Change the package path

This commit is contained in:
2018-02-20 06:40:29 +11:00
parent 0450ebf427
commit 51f152cca4
48 changed files with 0 additions and 3 deletions

View File

@@ -0,0 +1,539 @@
#' @include ROBITools.R
#' @include s3objects.R
#' @import ROBITaxonomy
NULL
require(ROBITaxonomy)
#
# FOR THE DEVELOPPER : we have to check that the code doesn't relies on the
# fact that the xx@samples$sample column is not always
# identical to the rownames(xx@samples)
setClassUnion("characterOrNULL",c("character","NULL"))
setClassUnion("matrixOrfactorL",c("matrix","factor"))
#
# We specialize data.frame in two subclasses motus.frame and samples.frame
# for this we add to function insuring the type checking and the cast from
# data.frame
#
is.motus.frame= function(x) any(class(x)=="motus.frame")
is.samples.frame= function(x) any(class(x)=="samples.frame")
as.motus.frame= function(x) {
if (! is.data.frame(x))
stop("only cast from data.frame is allowed")
if (! is.motus.frame(x))
x = addS3Class(x,"motus.frame")
return(x)
}
as.samples.frame= function(x) {
if (! is.data.frame(x))
stop("only cast from data.frame is allowed")
if (! is.samples.frame(x))
x = addS3Class(x,"samples.frame")
return(x)
}
samples.frame=as.samples.frame
motus.frame=as.motus.frame
as.factor.or.matrix = function(x) {
if (is.matrix(x))
return(x)
if (is.factor(x)){
if (length(dim(x))!=2)
stop('Just factor with two dimensions are allowed')
return(x)
}
if (!is.data.frame(x))
stop('Just matrix, 2D factor and data.frame can be casted')
tps = sapply(x,class)
allna = sapply(x, function(y) all(is.na(y)))
if (all(tps==tps[[1]] | allna)) {
tps = tps[[1]]
}
else
stop('all the column of the data.frame must have the same type')
tps = tps[[1]]
x = as.matrix(x)
dx = dim(x)
if (tps=='factor')
x = factor(x)
dim(x)=dx
return(x)
}
#' DNA metabarcoding experiment description class
#'
#' A S4 class describing a DNA metabarcoding experiment. It groups
#' three data frames describing samples, motus and occurrences of
#' MOTUs per sample
#'
#'@section Slots:
#' \describe{
#' \item{\code{reads}:}{Matrix of class \code{"numeric"},
#' containing the counts of reads per samples
#' \itemize{
#' \item{1 samples per line}
#' \item{1 sequence per column}
#' }
#' }
#'
#' \item{\code{samples}:}{Object of class \code{"data.frame"}, describing samples
#' \itemize{
#' \item{1 samples per line}
#' \item{1 property per column}
#' }
#' }
#'
#' \item{\code{motus}:}{Object of class \code{"data.frame"}, describing MOTUs (sequences)
#' \itemize{
#' \item{1 MOTU per line}
#' \item{1 property per column}
#' }
#' }
#'
#' \item{\code{layers}:}{Object of class \code{"list"}, containing a set of data layers
#' linking motus and samples. Each element of the list is a matrix
#' of the same size than the \code{read} slot with
#' \itemize{
#' \item{1 samples per line}
#' \item{1 sequence per column}
#' }
#' }
#'
#' \item{\code{scount}:}{Object of class \code{"integer"}, containing the count of sample}
#'
#' \item{\code{mcount}:}{Object of class \code{"integer"}, containing the count of MOTUs}
#'
#' \item{\code{sample.margin}:}{Vector of class \code{"numeric"}, describing the total count of
#' sequence per sample. By default this slot is set by applying sum
#' to the reads data.frame lines}
#'
#' \item{\code{taxonomy}:}{Object of class \code{"taxonomy.obitools"}, linking the DNA metabarcoding
#' experiment to a taxonomy}
#'
#' \item{\code{taxid}:}{Vector of class \code{"character"}, list of MOTUs' attributes to manage as taxid}
#' }
#'
#' @seealso \code{\link{taxonomy.obitools}},
#' @name metabarcoding.data
#' @rdname metabarcoding-data-class
#' @keywords DNA metabarcoding
#' @author Eric Coissac
#' @exportClass metabarcoding.data
setClass("metabarcoding.data",
#
# Attribute declaration
#
representation(reads = "matrix",
samples = "data.frame",
motus = "data.frame",
layers = "list",
scount = "integer",
mcount = "integer",
sample.margin = "numeric",
taxonomy = "obitools.taxonomyOrNULL",
taxid = "characterOrNULL"
),
#
# Check object structure
#
validity = function(object) {
## object : nom reserve !
#
# Check that reads / samples and motus data.frames
# have compatible sizes
#
# reads line count = samples line count
# reads column count = motus line count
rsize = dim(object@reads)
ssize = dim(object@samples)
msize = dim(object@motus)
csize = length(object@sample.margin)
if (rsize[1] != ssize[1] &
rsize[2] != msize[1] &
rsize[1] != csize)
return(FALSE)
# if no layer, object is ok
if (length(object@layers)==0)
return(TRUE)
# otherwise we check the size of each layer as we
# did for reads
return(! any(sapply(object@layers,
function(l) any(dim(l)!=c(ssize[1],msize[1])))))
}
)
#
#' metabarcoding.data constructor
#'
#' @docType methods
#' @rdname initialize-methods
#' @aliases initialize-methods,metabarcoding.data
setMethod("initialize",
"metabarcoding.data",
function(.Object, reads,samples,motus,
taxonomy=NULL,taxid=NULL,
sample.margin=NA,
layers=list()) {
rn = rownames(reads)
cn = colnames(reads)
.Object@reads <- reads
# .Object@samples <- as.samples.frame(samples)
.Object@samples <- samples
row.names(.Object@samples) = rn
#.Object@motus <- as.motus.frame(motus)
.Object@motus <- motus
row.names(.Object@motus) = cn
# Set colnames and rownames to each layers
layers = lapply(layers, function(x) {colnames(x)=cn
rownames(x)=rn
return(x)})
.Object@layers <- layers
# Precompute sample count and motu count
.Object@scount = dim(.Object@samples)[1]
.Object@mcount = dim(.Object@motus)[1]
.Object@taxonomy = taxonomy
.Object@taxid = taxid
if (is.null(sample.margin))
.Object@sample.margin = rowSums(reads)
else
.Object@sample.margin = sample.margin
names(.Object@sample.margin) = rn
validObject(.Object) ## valide l'objet
return(.Object)
})
#
# metabarcoding.data getters
#
#' @export
setGeneric("reads", function(obj) {
return(standardGeneric("reads"))
})
#' Extracts the matrix describing MOTUs abondances
#'
#' Extract the the matrix describing MOTUs abondances (read counts)
#' from a \code{\link{metabarcoding.data}} instance.
#'
#' @param obj a \code{\link{metabarcoding.data}} instance
#' @return a matrix containing data about reads
#'
#' @examples
#' # load termite data set from the ROBITools sample data
#' data(termes)
#'
#' # Extract the matrix describing MOTUs abondances
#' d = reads(termes)
#'
#' head(d)
#'
#' @seealso \code{\link{metabarcoding.data}},
#' \code{\link{motus}}, \code{\link{samples}}
#'
#' @docType methods
#' @rdname read-methods
#' @aliases read-methods,metabarcoding.data
#' @author Eric Coissac
#'
setMethod("reads", "metabarcoding.data", function(obj) {
return(obj@reads)
})
# get samples data.frames
#' @export
setGeneric("samples", function(obj) {
return(standardGeneric("samples"))
})
#' Extracts the samples description data.frame
#'
#' Extract the sample description data.frame from a
#' \code{\link{metabarcoding.data}} instance.
#'
#' @param obj a \code{\link{metabarcoding.data}} instance
#' @return a data.frame containing data about sample
#'
#' @examples
#' # load termite data set from the ROBITools sample data
#' data(termes)
#'
#' # Extract the data frame describing samples
#' d = samples(termes)
#'
#' head(d)
#'
#' @seealso \code{\link{metabarcoding.data}},
#' \code{\link{motus}}, \code{\link{reads}}
#'
#' @docType methods
#' @rdname samples-methods
#' @aliases samples-methods,metabarcoding.data
#' @author Eric Coissac
#'
setMethod("samples", "metabarcoding.data", function(obj) {
return(obj@samples)
})
#' @export
setGeneric("motus", function(obj) {
return(standardGeneric("motus"))
})
#' Extracts the MOTU descriptions \code{data.frame}
#'
#' Extract the MOTUs description \code{data.frame} from a
#' \code{\link{metabarcoding.data}} instance.
#'
#' @param obj a \code{\link{metabarcoding.data}} instance
#' @return a data.frame containing data about MOTU
#'
#' @examples
#' # load termite data set from the ROBITools sample data
#' data(termes)
#'
#' # Extract the data.frame describing MOTUs
#' d = motus(termes)
#'
#' head(d)
#'
#' @seealso \code{\link{metabarcoding.data}},
#' \code{\link{reads}}, \code{\link{samples}}
#'
#' @docType methods
#' @rdname motu-methods
#' @aliases motu-methods,metabarcoding.data
#'
setMethod("motus", "metabarcoding.data", function(obj) {
return(obj@motus)
})
# get sample count
setGeneric("sample.count", function(obj) {
return(standardGeneric("sample.count"))
})
setMethod("sample.count", "metabarcoding.data", function(obj) {
return(obj@scount)
})
# get motu count
setGeneric("motu.count", function(obj) {
return(standardGeneric("motu.count"))
})
setMethod("motu.count", "metabarcoding.data", function(obj) {
return(obj@mcount)
})
# dim method
setMethod("dim", "metabarcoding.data", function(x) {
return(c(x@scount,x@mcount))
})
setMethod('[', "metabarcoding.data", function(x,i=NULL,j=NULL,...,drop=TRUE) {
# special case if samples are not specified (dimension 1)
if (!hasArg(i))
i = 1:x@scount
# special case if motus are not specified (dimension 2)
if (!hasArg(j))
j = 1:x@mcount
# special case if the layer attribut is specified
args = list(...)
if (!is.null(args$layer))
return(x[[args$layer]][i,j])
#####################
#
# normal case
#
r = x@reads[i,j,drop=FALSE]
if (sum(dim(r) > 1)==2 | ! drop)
{
# we do the selection on the motus and samples description data.frame
m = x@motus[j,,drop=FALSE]
s = x@samples[i,,drop=FALSE]
# we do the selection on each layers
l = lapply(x@layers,function(l) l[i,j,drop=FALSE])
newdata = copy.metabarcoding.data(x, reads=r, samples=s, motus=m, layers=l)
}
else
{
newdata = as.numeric(x@reads[i,j])
}
return(newdata)
})
setMethod('[<-', "metabarcoding.data",
function (x, i, j, ..., value) {
if (!hasArg(i))
i = 1:x@scount
if (!hasArg(j))
j = 1:x@mcount
args = list(...)
if (is.null(args$layer))
x@reads[i, j]=value
else
x[[args$layer]][i,j]=value
return(x)
})
#################################################
#
# User interface function to create
# metabarcoding.data objects
#
#################################################
#'@export
metabarcoding.data = function(reads,samples,motus,
taxonomy=NULL,taxid=NULL,
sample.margin=NULL,
layers=list()) {
rd = new('metabarcoding.data',
reads=reads,
samples=samples,
motus=motus,
taxonomy=taxonomy,
taxid=taxid,
sample.margin=sample.margin,
layers=layers
)
return(rd)
}
copy.metabarcoding.data = function(data,
reads=NULL,
samples=NULL,motus=NULL,
taxonomy=NULL,taxid=NULL,
sample.margin=NULL,
layers=NULL) {
if (is.null(reads))
reads = data@reads
if (is.null(samples))
samples = data@samples
if (is.null(motus))
motus = data@motus
if (is.null(taxonomy))
taxonomy = data@taxonomy
if (is.null(taxid))
taxid = data@taxid
if (is.null(sample.margin))
sample.margin = data@sample.margin
if (is.null(layers))
layers = data@layers
rd = new('metabarcoding.data',
reads=reads,
samples=samples,
motus=motus,
taxonomy=taxonomy,
taxid=taxid,
sample.margin=sample.margin,
layers=layers
)
return(rd)
}
#' @export
setGeneric('rownames')
#' @export
setMethod("rownames", "metabarcoding.data", function(x, do.NULL = TRUE, prefix = "col") {
return(rownames(x@reads,do.NULL,prefix))
})
#' @export
setGeneric('colnames')
#' @export
setMethod("colnames", "metabarcoding.data", function(x, do.NULL = TRUE, prefix = "col") {
return(colnames(x@reads,do.NULL,prefix))
})

33
R/ROBITools.R Normal file
View File

@@ -0,0 +1,33 @@
#' A package to manipulate DNA metabarcoding data.
#'
#' This package was written as a following of the OBITools.
#'
#' \tabular{ll}{
#' Package: \tab ROBITools\cr
#' Type: \tab Package\cr
#' Version: \tab 0.1\cr
#' Date: \tab 2013-06-27\cr
#' License: \tab CeCILL 2.0\cr
#' LazyLoad: \tab yes\cr
#'}
#'
#' @name ROBITools-package
#' @aliases ROBITools
#' @docType package
#' @title A package to manipulate DNA metabarcoding data.
#' @author Frederic Boyer
#' @author Aurelie Bonin
#' @author Lucie Zinger
#' @author Eric Coissac
#'
#' @references http://metabarcoding.org/obitools
#'
NA
.onLoad <- function(libname, pkgname) {
packageStartupMessage( "ROBITools package" )
#print(getwd())
}

229
R/aggregate.R Normal file
View File

@@ -0,0 +1,229 @@
#' @include 02_class_metabarcoding.data.R
NULL
# TODO: Add comment
#
# Author: coissac
###############################################################################
#' @export
aggregate.metabarcoding.data=function(x, by, FUN,...,
MARGIN='sample',
default.layer=NULL,
layers=NULL) {
uniq.value = function(z) {
if (is.null(z) |
any(is.na(z)) |
length(z)==0)
ans = NA
else {
if (all(z==z[1]))
ans = z[1]
else
ans = NA
}
if (is.factor(z))
ans = factor(ans,levels=levels(z))
return(ans)
}
#
# Deals with the supplementaty aggregate arguments
#
if (is.null(default.layer))
default.layer=uniq.value
if (is.null(layers)) {
layers = as.list(rep(c(default.layer),length(x@layers)))
names(layers)=layer.names(x)
}
else {
for (n in layer.names(x))
if (is.null(layers[[n]]))
layers[[n]]=default.layers
}
if (MARGIN == 'sample')
MARGIN=1
if (MARGIN == 'motu')
MARGIN=2
reads = x@reads
if (MARGIN==1) {
# prepare the aggrevation arguments for the read table
# from the function arguments
dotted = list(...)
if (length(dotted) > 0)
aggr.args = list(reads,by=by,FUN=FUN,...=dotted,simplify=FALSE)
else
aggr.args = list(reads,by=by,FUN=FUN,simplify=FALSE)
# Aggregate the read table
ragr = do.call(aggregate,aggr.args)
# extrat new ids from the aggregated table
ncat = length(by)
ids = as.character(interaction(ragr[,1:ncat,drop=FALSE]))
# remove the aggregations modalities to rebuild a correct
# reads table
ragr = as.matrix(ragr[,-(1:ncat),drop=FALSE])
dragr= dim(ragr)
cragr= colnames(ragr)
ragr = as.numeric(ragr)
dim(ragr)=dragr
colnames(ragr)=cragr
rownames(ragr)=ids
#
# Apply the same aggragation to each layer
#
ln = layer.names(x)
la = vector(mode="list",length(ln))
names(la)=ln
for (n in ln) {
f = layers[[n]]
if (is.factor(x[[n]])){
isfact = TRUE
lf = levels(x[[n]])
df = dim(x[[n]])
m = matrix(as.character(x[[n]]))
dim(m)=df
}
else
m = x[[n]]
aggr.args = list(m,by=by,FUN=f,simplify=FALSE)
lagr = do.call(aggregate,aggr.args)
lagr = as.factor.or.matrix(lagr[,-(1:ncat),drop=FALSE])
if (isfact){
df = dim(lagr)
lagr = factor(lagr,levels=lf)
dim(lagr)=df
}
rownames(lagr)=ids
la[[n]]=lagr
}
# aggragate the sample table according to the same criteria
#
# TODO: We have to take special care of factors in the samples
# data.frame
sagr = aggregate(samples(x),by,uniq.value,simplify=FALSE)
# move the first columns of the resulting data frame (the aggregations
# modalities to the last columns of the data.frame
sagr = sagr[,c((ncat+1):(dim(sagr)[2]),1:ncat),drop=FALSE]
larg = c(lapply(sagr,unlist),list(stringsAsFactors=FALSE))
sagr = do.call(data.frame,larg)
# set samples ids to the ids computed from modalities
sagr$id=ids
rownames(sagr)=ids
# build the new metabarcoding data instance
newdata = copy.metabarcoding.data(x,reads=ragr,samples=sagr)
}
else {
# prepare the aggregation arguments for the read table
# from the function arguments
# BECARFUL : the reads table is transposed
# standard aggregate runs by row and we want
# aggregation by column
dotted = list(...)
if (length(dotted) > 0)
aggr.args = list(t(reads),by=by,FUN=FUN,...=dotted,simplify=FALSE)
else
aggr.args = list(t(reads),by=by,FUN=FUN,simplify=FALSE)
# Aggregate the read table
ragr = do.call(aggregate.data.frame,aggr.args)
# extrat new ids from the aggregated table
ncat = length(by)
ids = as.character(interaction(ragr[,1:ncat,drop=FALSE]))
# remove the aggregations modalities to rebuild a correct
# reads table
ragr = t(ragr[,-(1:ncat),drop=FALSE])
dragr= dim(ragr)
rragr= rownames(ragr)
ragr = as.numeric(ragr)
dim(ragr)=dragr
colnames(ragr)=ids
rownames(ragr)=rragr
#
# Apply the same aggragation to each layer
#
ln = layer.names(x)
la = vector(mode="list",length(ln))
names(la)=ln
for (n in ln) {
f = layers[[n]]
if (is.factor(x[[n]])){
isfact = TRUE
lf = levels(x[[n]])
df = dim(x[[n]])
m = matrix(as.character(x[[n]]))
dim(m)=df
}
else
m = x[[n]]
aggr.args = list(t(m),by=by,FUN=f,simplify=FALSE)
lagr = do.call(aggregate,aggr.args)
lagr = t(as.factor.or.matrix(lagr[,-(1:ncat),drop=FALSE]))
if (isfact){
df = dim(lagr)
lagr = factor(lagr,levels=lf)
dim(lagr)=df
}
colnames(lagr)=ids
la[[n]]=lagr
}
# aggragate the motus table according to the same criteria
magr = aggregate(motus(x),by,uniq.value,simplify=FALSE)
# move the first columns of the resulting data frame (the aggregations
# modalities to the last columns of the data.frame
magr = magr[,c((ncat+1):(dim(magr)[2]),1:ncat),drop=FALSE]
larg = c(lapply(magr,unlist),list(stringsAsFactors=FALSE))
magr = do.call(data.frame,larg)
# set motus ids to the ids computed from modalities
magr$id=ids
rownames(magr)=ids
# build the new metabarcoding data instance
newdata = copy.metabarcoding.data(x,reads=ragr,motus=magr,layers=la)
}
return(newdata)
}

107
R/choose.taxonomy.R Normal file
View File

@@ -0,0 +1,107 @@
#' @import ROBITaxonomy
#' @include 02_class_metabarcoding.data.R
NULL
#' Choose between databases for taxonomic classifications
#'
#' Chooses a sequence taxonomic assignment in order of preference for the different
#' reference databases that have been used when the assignment is above a certain threshold
#'
#'
#' @param x a \code{\link{metabarcoding.data}} object
#' @param taxonomy a \code{\linkS4class{taxonomy.obitools}} instance
#' @param dbrank string or vector indicating reference database names ranked by order of preference
#' @param thresh a best_identity threshold for applying priority. Default is \code{0.95}
#'
#' @return returns a data.frame with the refined taxonomic assignement and classic taxonomy description.
#'
#' @examples
#'
#' data(termes)
#'
#' taxo=default.taxonomy()
#'
#' #create artificial taxonomic assignments
#' attr(termes, "motus")["best_identity:DB1"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T)
#' attr(termes, "motus")["best_identity:DB2"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T)
#' attr(termes, "motus")["best_identity:DB3"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T)
#' attr(termes, "motus")["taxid_by_db:DB1"] = termes$motus$taxid
#' attr(termes, "motus")["taxid_by_db:DB2"] = sample(termes$motus$taxid,size=nrow(termes$motus), replace=F)
#' attr(termes, "motus")["taxid_by_db:DB3"] = sample(termes$motus$taxid,size=nrow(termes$motus), replace=F)
#'
#' #Run taxo.decider
#' termes.ok = taxo.decider(termes, taxo, "DB2", 0.95)
#' head(termes.ok$motus[union(grep("DB", colnames(termes.ok$motus)), grep("_ok", colnames(termes.ok$motus)))])
#'
#' termes.ok = taxo.decider(termes, taxo, c("DB3", "DB1"), 0.95)
#' head(termes.ok$motus[union(grep("DB", colnames(termes.ok$motus)), grep("_ok", colnames(termes.ok$motus)))])
#'
#' #Quick look at the enhancement in taxonomic assignements
#' par(mfrow=c(1,4))
#' for(i in grep("best_identity.", colnames(termes.ok$motus))){
#' hist(termes.ok$motus[,i], breaks=20, ylim=c(1,21), main=colnames(termes.ok$motus)[i], xlab="assignment score")
#' }
#'
#' @seealso \code{\linkS4class{taxonomy.obitools}}, and methods \code{\link{species}},\code{\link{genus}}, \code{\link{family}},\code{\link{kingdom}},
#' \code{\link{superkingdom}},\code{\link{taxonatrank}}, \code{\link{taxonmicank}}
#'
#' @author Lucie Zinger
#' @keywords taxonomy
#'
#' @export
#'
taxo.decider = function(x, taxonomy, dbrank, thresh=0.95) {
noms = colnames(x$motus)
best_ids_names = noms[grep("best_identity.", noms)]
best_ids = x$motus[,best_ids_names]
taxids = x$motus[, gsub("best_identity", "taxid_by_db", best_ids_names)]
dbs = unlist(lapply(strsplit(best_ids_names, "\\:"), "[[", 2))
#Set max indices
ind = as.vector(t(apply(best_ids,1,function(y) order(rank(-y, ties.method="max"), match(dbrank, dbs))))[,1])
#Set default vector: db, bestids, taxids with max score
db_ok = dbs[ind]
best_identity_ok = best_ids[cbind(1:length(ind), ind)]
taxids_by_db_ok = taxids[cbind(1:length(ind), ind)]
#Get vector of db index that should be used according to condition > thresh
db_choice = taxo.decider.routine(dbrank, best_ids, dbs, thresh)
#Replacing by right values according to db_ok
for(i in 1:length(dbrank)){
db_ok[which(db_choice==i)] = dbrank[i]
best_identity_ok[which(db_choice==i)] = best_ids[which(db_choice==i),grep(dbrank[i], colnames(best_ids))]
taxids_by_db_ok[which(db_choice==i)] = taxids[which(db_choice==i),grep(dbrank[i], colnames(taxids))]
}
decision = data.frame(db_ok, best_identity_ok, taxids_by_db_ok)
coltaxid = colnames(decision)[grep("taxid", colnames(decision))]
attr(x, "motus") = data.frame(x$motus, decision)
new.tax = get.classic.taxonomy(x, taxonomy, coltaxid)
attr(x, "motus") = data.frame(x$motus, new.tax)
return(x)
}
taxo.decider.routine = function(dbrank, best_ids, dbs, thresh) {
#Setting mask
mask = matrix(NA,nrow(best_ids),length(dbrank))
colnames(mask)=dbrank
#For each DB, see if condition T/F
for(i in dbrank){
mask[,i] = best_ids[,which(dbs==i)]>thresh
}
#Get the first occurence of T in the table
out = apply(mask, 1, function(x) which(x==T)[1])
return(out)
}

49
R/contaslayer.R Normal file
View File

@@ -0,0 +1,49 @@
#' @include 02_class_metabarcoding.data.R
NULL
#' Detects contaminants in metabarcoding data
#'
#' Detects sequences/motus in a \code{\link{metabarcoding.data}} object
#' for which frequencies over the entire dataset are maximum in negative controls and
#' hence, most likely to be contaminants.
#'
#'
#' @param x a \code{\link{metabarcoding.data}} object
#' @param controls a vector of samples names where conta are suspected to be detected
#' (typically negative control names).
#' @param clust a vector for grouping sequences. Default set to \code{NULL}.
#'
#' @return a vector containing the names of sequences identified as contaminants
#'
#' @examples
#'
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' neg = rownames(termes.ok)[grep("r",rownames(termes.ok))]
#'
#' #finds contaminants based on neg samples
#' contaslayer(termes.ok, neg)
#'
#' # extanding contamininant detection with grouping factor,
#' # typically obiclean/sumatra cluster or taxonomy membership
#' contaslayer(termes.ok, neg, termes.ok$motus$scientific_name)
#'
#' @seealso \code{\link{threshold}} for further trimming
#' @author Lucie Zinger
#' @export
contaslayer = function(x,controls,clust=NULL){
x.fcol = normalize(x, MARGIN=2)$reads
x.max = rownames(x.fcol[apply(x.fcol, 2, which.max),])
conta = colnames(x)[!is.na(match(x.max,controls))]
if (length(clust)!=0) {
agg = data.frame(conta.id=colnames(x.fcol), clust)
conta.ext = agg$conta.id[which(!is.na(match( agg$clust, agg$clust[match(conta,agg$conta.id)])))]
return(as.vector(conta.ext))
}
else {
return(conta)
}
}

178
R/distrib.extrapol.R Normal file
View File

@@ -0,0 +1,178 @@
#' @include 02_class_metabarcoding.data.R
NULL
#' Read frequencies krigging
#'
#' Extrapolates read frequencies from a \code{\link{metabarcoding.data}} object in space for a finer resolution
#'
#' @param x a vector or matrix from a row-normalized read table
#' \code{\link{metabarcoding.data}} object
#' @param min.coord a vector of length = 2 indicating the minimum values of x and y
#' coordinates to be used for the predicted grid
#' @param max.coord a vector of length = 2 indicating the maximum values of x and y
#' coordinates to be used for the predicted grid
#' @param grid.grain an integer indicating the resolution (i.e. nb of subpoints) in x and y
#' coordinates required for the predicted grid
#' @param coords a dataframe containing the x and y coordinates of the abundances
#' from x to be extrapolated.
#' @param otus.table a motus data.frame containing motus informations of x
#' @param cutoff a cutoff below which abundances are set to 0.
#' This threshold also determines the value to be added to 0 values for log10
#' transformation
#' @param return.metabarcoding.data if \code{TRUE}, returns a \code{\link{metabarcoding.data}} object. Default is \code{FALSE}
#'
#' @return either a dataframe or a S3 object with a structure similar to \code{\link{metabarcoding.data}} object.
#' The number of samples corresponds to the predicted points.
#' The two last columns (if \code{return.metabarcoding.data==F}) or sample data.frame contains x y coordinates of the predicted grid
#' The all but last two columns (if \code{return.metabarcoding.data==F}) or read matrix contains the predicted log10 transformed relative abundances
#' instead of reads counts
#' If \code{return.metabarcoding.data==F} the motus data.frame contains the motus informations from x
#'
#' @examples
#'
#' data(termes)
#' #Create dummy spatial coordinates
#' attr(termes, "samples")[c("x", "y")] = expand.grid(1:7,1:3)
#'
#' #compute frequencies
#' attr(termes, "layers")[["reads.freq"]] = normalize(termes, MARGIN=1)$reads
#'
#' # Getting extrapolations
#' termes.pred = extrapol.freq(attr(termes, "layers")[["reads.freq"]], min.coord=c(1,1), max.coord=c(7,3),
#' grid.grain=100,termes$samples[,c("x", "y")], termes$motus, cutoff=1e-3)
#'
#' head(termes.pred$reads)
#' @seealso \code{\link{map.extrapol.freq}} as well as \code{sp} and \code{gstat} packages
#' @author Lucie Zinger
#' @export
extrapol.freq = function(x, min.coord, max.coord, grid.grain=100, coords, otus.table, cutoff=1e-3, return.metabarcoding.data = FALSE) {
require(gstat)
require(sp)
#predicted grid setting
new.x = seq(min.coord[1], max.coord[1], length.out = grid.grain)
new.y = seq(min.coord[2], max.coord[2], length.out = grid.grain)
grid.p=expand.grid(new.x, new.y)
colnames(grid.p)=c("x", "y")
S=sp::SpatialPoints(grid.p); sp::gridded(S)<-TRUE
m=gstat::vgm(50, "Exp", 100)
#krigging
preds = apply(x, 2, function(otu) {
otu[otu<cutoff] = cutoff
spj=cbind(coords,otu)
colnames(spj)=c("x", "y", "otu")
spj.g=gstat::gstat(id="Log10.freq", formula=log10(otu)~1,locations=~x+y,data=spj,model=m)
gstat::predict.gstat(spj.g, grid.p, quiet=T)$Log10.freq.pred
})
#formatting the output
colnames(preds) = rownames(otus.table)
rownames(preds) = paste("s", 1:nrow(grid.p), sep=".")
row.names(grid.p) = rownames(preds)
if(return.metabarcoding.data==F) {
out = data.frame(preds, grid.p)
} else{
out = metabarcoding.data(preds, grid.p, otus.table)
}
return(out)
}
#' Maps of krigged log10-transformed frequencies
#'
#' Maps the output of extrapol.freq
#'
#'
#' @param x an extrapol.freq output
#' @param path the path of the folder to export the map. Default is \code{NULL} and map is printed in Rplot/quartz
#' @param col.names a vector containing the names of the columns to be used for defining the file name. Typically
#' the column names containing the taxonomic information and/or sequence/motus id.
#' @param index an integer indicating column number of the motu/sequence to be plotted.
#' @param cutoff lower motu frequency accepted to consider motu abundance as different
#' from 0. Should be the same than the one used in extrapol.freq
#' @param add.points a 3-column data.frame containing factor levels and associated x and y coordinates
#' to be added to the map. Typically taxa observed in the field.
#' @param adj a value used for adjusting text position in the map. Default is \code{4}
#'
#' @return a map/png file displaying motus distribution.
#'
#' @examples
#'
#' data(termes)
#' attr(termes, "samples")[c("x", "y")] = expand.grid(1:7,1:3)
#'
#' #compute frequencies
#' attr(termes, "layers")[["reads.freq"]] = normalize(termes, MARGIN=1)$reads
#'
#' # Getting extrapolations
#' termes.pred = extrapol.freq(attr(termes, "layers")[["reads.freq"]],
#' grid.grain=100,termes$samples[,c("x", "y")], termes$motus, cutoff=1e-3)
#'
#' #mapping the distribution of the 3 most abundant sequences (caution, mfrow does not work for lattice's levelplot)
#' map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 1, cutoff=1e-3)
#' map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 2, cutoff=1e-3)
#' map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 3, cutoff=1e-3)
#'
#' #dummy observationnal data
#' termes.obs = data.frame(x=c(2,3,5), y=c(2.7,2,2.6), taxa = rep("Isoptera Apicotermitinae", 3))
#' map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 3, cutoff=1e-3, add.points=termes.obs)
#'
#' @seealso \code{\link{extrapol.freq}}, and \code{levelplot} from \code{lattice} package
#' @author Lucie Zinger
#' @export
map.extrapol.freq = function(x, path=NULL, col.name=NULL, index, cutoff=1e-3, add.points=NULL, adj=4) {
require(lattice)
if(!is.null(path)) {
x.motus = apply(x$motus,2,as.character)
name = gsub("\\.", "_", paste(gsub(", ", "_", toString(x.motus[index,col.name])), x.motus[index,"id"], sep="_"))
file.out = paste(path, "/", name, ".png", sep="")
}
z=x$reads[,index]
z[abs(z)>abs(log10(cutoff))]=log10(cutoff)
z[z>0] = 0
spj=as.data.frame(cbind(x$samples,z))
colnames(spj)=c("x", "y", "z")
map.out=levelplot(z~x+y, spj, col.regions=topo.colors(100),
at=seq(log10(cutoff),log10(1), by=0.2),
colorkey=list(at=seq(log10(cutoff),log10(1), by=0.2),
labels=list(at=seq(log10(cutoff),log10(1), by=0.2),
labels=round(10^seq(log10(cutoff),log10(1), by=0.2),3))),
aspect = "iso", contour=F, main=list(label=x$motus[index, "id"], cex=0.7))
if(!is.null(path)) {
png(file=file.out, width=800, height=800)
print(map.out)
if(!is.null(add.points)) {
n = (max(spj[,"y"])-min(spj["y"]))/length(unique(spj[,"y"]))*adj
trellis.focus("panel", 1, 1, highlight=FALSE)
lpoints(add.points[,"x"], add.points[,"y"], cex=0.7, lwd=3, col="red")
ltext(add.points[,"x"], add.points[,"y"]+n, add.points[,-match(c("x", "y"), colnames(add.points))], col="red", cex=1.5)
trellis.unfocus()
}
dev.off()
} else {
print(map.out)
if(!is.null(add.points)) {
n = (max(spj[,"y"])-min(spj["y"]))/length(unique(spj[,"y"]))*adj
trellis.focus("panel", 1, 1, highlight=FALSE)
lpoints(add.points[,"x"], add.points[,"y"], cex=0.7, lwd=3, col="red")
ltext(add.points[,"x"], add.points[,"y"]+n, add.points[,-match(c("x", "y"), colnames(add.points))], col="red", cex=1)
trellis.unfocus()
}
}
}

206
R/experimental.section.R Normal file
View File

@@ -0,0 +1,206 @@
#' @include 02_class_metabarcoding.data.R
NULL
#11.03.2011
#L.Zinger
#######################
#function anosim.pw
#######################
#computes pairwise anosim computation
#input:
#dat: dissimilarity matrix
#g: factor defining the grouping to test
#permutations: nb of permutation to access anosim statistics
#p.adjust.method: method of correction for multiple-testing
#
#output: a distance-like table containing:
#in the upper triangle: the anosims R values
#in the lower triangle: the adjusted p-values
### start
anosim.pw<-function(dat, g, permutations, p.adjust.method, ...) {
require(vegan)
#data.trasformation
dat<-as.matrix(dat)
g<-factor(g)
#empty object for result storage
ano<-matrix(NA, nrow=nlevels(g), ncol=nlevels(g), dimnames=list(levels(g),levels(g)))
p.val.tmp<-NULL
#running anosims
for(i in 1:(nlevels(g)-1)) for(j in (i+1):nlevels(g)){
tmp<-anosim(as.dist(dat[c(which(g==levels(g)[i]),which(g==levels(g)[j])),
c(which(g==levels(g)[i]),which(g==levels(g)[j]))]),
c(rep(levels(g)[i], length(which(g==levels(g)[i]))),
rep(levels(g)[j], length(which(g==levels(g)[j])))), permutations)
ano[i,j]<-tmp$statistic
p.val.tmp<-append(p.val.tmp, tmp$signif)
}
#p value correction for multiple comparison
p.val.tmp<-p.adjust(p.val.tmp, p.adjust.method )
#put the corrected p values in the anosim table
tmp<-NULL
tmp2<-NULL
for(i in 1:(nlevels(g)-1)) for(j in (i+1):nlevels(g)){
tmp<-append(tmp,i)
tmp2<-append(tmp2,j)
}
for(i in 1:length(p.val.tmp)){
ano[tmp2[i],tmp[i]]<-p.val.tmp[i]}
return(ano)
}
### end
#23 Nov 2012
#L.Zinger
###################
#function MOTUtable
###################
# Generates ready-to-use MOTU tables and basic statistics on samples (i.e. sequencing depth, raw richness, and invsimpson index)
#input:
#x: an obitable output (samples should be indicated as e.g. "sample.A01r" in column names)
#y: the column name by which that data are to be aggregated. Should be e.g. "cluster" or "species_name"
#outputs:
#x.otu: the ready-to-use MOTU table
#x.rawstats: basic statistics on samples
### start
MOTUtable<-function(x, y) {
require(vegan)
nom<-as.character(substitute(x))
tmp<-x[,c(grep(y, colnames(x)), grep("sample", colnames(x)))]
tmp2<-t(aggregate(tmp[,-1], by=list(tmp[,1]), sum))
x.otu<-tmp2[-1,]
colnames(x.otu)<-paste(y,tmp2[1,], sep=".")
x.rawstats<-data.frame(Nb_ind=rowSums(x.otu), Raw_richness=specnumber(x.otu, MARGIN=1), Raw_eveness=diversity(x.otu, "invsimpson", MARGIN=1) )
#may have a pb in the rowSums depending on the R version (allows or not non-numeric)
assign(paste(nom, y, sep="."),x.otu,env = .GlobalEnv)
assign(paste(nom, y, "rawstats", sep="."),x.rawstats,env = .GlobalEnv)
}
### end
#26 Nov 2012
#F.Boyer
###################
#function reads.frequency & filter.threshold
###################
#can be used to filter the table of reads to have the sequences that represents at least 95% of the total reads by sample
#
#e.g. reads.treshold(reads.frequency(metabarcodingS4Obj@reads), 0.95)
filter.threshold <- function(v, threshold) {
o <- order(v, decreasing=T)
ind <- which(cumsum(as.matrix(v[o]))>threshold)
v[-o[seq(min(length(o), 1+length(o)-length(ind)))]] <- 0
v
}
reads.threshold <- function (reads, threshold, by.sample=T) {
res <- apply(reads, MARGIN=ifelse(by.sample, 1, 2), filter.threshold, thr=threshold)
if (by.sample) res <- t(res)
data.frame(res)
}
reads.frequency <- function (reads, by.sample=T) {
res <- apply(reads, MARGIN=ifelse(by.sample, 1, 2), function(v) {v/sum(v)})
if (by.sample) res <- t(res)
data.frame(res)
}
#06 Jan 2013
#F.Boyer
###################
#function removeOutliers
###################
#given a contengency table and a distance matrix
#returns the list of samples that should be removed in order to have only
#distances below thresold
#can't return only one sample
#
#e.g. intraBad <- lapply(levels(sample.desc$sampleName), function(group) {samples<-rownames(sample.desc)[sample.desc$sampleName==group]; removeOutliers(contingencyTable[samples,], thr=0.3, distFun = function(x) vegdist(x, method='bray'))})
#require(vegan)
removeOutliers <- function(m, thr=0.3, distFun = function(x) vegdist(x, method='bray') ) {
distMat <- as.matrix(distFun(m))
maxM <- max(distMat)
theBadGuys =c()
while (maxM>thr) {
bad <- apply(distMat, MARGIN=1, function(row, maxM) {any(row==maxM)}, maxM=maxM)
bad <- names(bad)[bad]
bad <- apply(distMat[bad,], MARGIN=1, mean)
badGuy <- names(bad)[bad==max(bad), drop=F][1]
theBadGuys <- c(theBadGuys, badGuy)
stillok <- rownames(distMat) != badGuy
distMat <- distMat[stillok, stillok, drop=F]
maxM <- max(distMat)
}
if (length(theBadGuys) >= (nrow(m)-1)) {
theBadGuys <- rownames(m)
}
theBadGuys
}
#31.05.2013
#L.Zinger
#getAttrPerS, a function allowing to get the values of a sequence attribute per sample
#(e.g. best_identities, etc...) the output is a list with one dataframe per sample.
#This dataframe contains:
# first column (named as attr): the attribute value for each sequence present in the sample
# second column (named weight): the corresponding number of reads in the sample
getAttrPerS=function(x,attr){
#x: a metabarcoding object
#attr: a character object corresponding to the attribute
#for which values per sample are needed (should be equal to a colname in x@motus)
if(class(x)[1]!= "metabarcoding.data") {
stop("x is not a metabarcoding S4 object")
}
if(is.character(attr)==F) {
stop("attr is not a character object")
}
x.motus = motus(x)
x.reads = reads(x)
otu = apply(x.reads, 1, function(y) x.motus[match(names(y[which(y!=0)]),x.motus$id), grep(attr, colnames(x.motus))])
reads = apply(x.reads, 1, function(y) y[which(y!=0)])
output = mapply(cbind, otu, reads)
output = lapply(output, function(y) {
colnames(y)=c(attr,"weight")
return(y)
})
return(output)
}
### end getAttrPerS

62
R/export-metabarcoding.R Normal file
View File

@@ -0,0 +1,62 @@
#' @include 02_class_metabarcoding.data.R
NULL
# TODO: Add comment
#
# Author: coissac
###############################################################################
require(utils)
expand.metabarcoding.data=function(data,minread=1) {
resultonesample=function(sample) {
mo= data@reads[sample,] >= minread
s = data@samples[rep(sample,sum(mo)),]
r = as.numeric(data@reads[sample,mo])
m = data@motus[mo,]
result = data.frame(s,frequency=r,m,
stringsAsFactors =FALSE,
row.names = NULL)
result
}
res = lapply(1:data@scount, resultonesample)
do.call(rbind,res)
}
#setGeneric("utils::write.csv")
write.csv.metabarcoding.data = function(...) {
Call <- match.call(expand.dots = TRUE)
if (!is.null(Call[["minread"]])) {
minread = Call[["minread"]]
Call = Call[!names(Call)=="minread"]
}
else
minread = 1
data = eval.parent(Call[[2L]])
data = expand.metabarcoding.data(data,minread)
Call[[1L]] <- as.name("write.csv")
Call[[2L]] <- as.name("data")
eval(Call)
}
#setGeneric("utils::write.csv2")
write.csv2.metabarcoding.data = function(...) {
Call <- match.call(expand.dots = TRUE)
if (!is.null(Call[["minread"]])) {
minread = Call[["minread"]]
Call = Call[!names(Call)=="minread"]
}
else
minread = 1
data = eval.parent(Call[[2L]])
data = expand.metabarcoding.data(data,minread)
Call[[1L]] <- as.name("write.csv2")
Call[[2L]] <- as.name("data")
eval(Call)
}

106
R/import.metabarcoding.R Normal file
View File

@@ -0,0 +1,106 @@
#' @include read.obitab.R
#' @include 02_class_metabarcoding.data.R
NULL
#' Read a data file produced by the \code{obitab} command
#'
#' Read a data file issued from the conversion of a \strong{fasta}
#' file to a tabular file by the \code{obitab} command of the
#' \strong{OBITools} package
#'
#' @param file a string containing the file name of the obitab file.
#' @param sep Column separator in the obitab file.
#' The default separator is the tabulation.
#' @param sample A regular expression allowing to identify columns
#' from the file describing abundances of sequences per sample
#' @param sample.sep Separator between combined sample name.
#' @param attribute Separator used to split between sample 'tag' and sample name.
#'
#' @return a \code{\link{metabarcoding.data}} instance
#'
#' @examples
#' require(ROBITools)
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
#'
#' # read the termes.tab file
#' termes=import.metabarcoding.data('termes.tab')
#'
#' # print the number of samples and motus described in the file
#' dim(termes)
#'
#' @seealso \code{\link{metabarcoding.data}}
#'
#' @author Eric Coissac
#' @keywords DNA metabarcoding
#' @export
#'
import.metabarcoding.data = function(file,sep='\t',sample="sample",sample.sep="\\.",attribute=":") {
data=read.obitab(file,sep=sep)
# get the colnames matching the sample pattern
column=colnames(data)
pat = paste('(^|',sample.sep,')',sample,'[',sample.sep,attribute,']',sep='')
scol= grep(pat,column)
# reads informations about samples
reads = data[,scol]
names = colnames(reads)
names = strsplit(names,split=attribute)
# for sample name just remove the first part of the col names
# usally "sample:"
sample.names = sapply(names,function(a) paste(a[-1],collapse=attribute))
reads=t(reads)
rownames(reads)=sample.names
# sample's data
sample.data = data.frame(t(data.frame(strsplit(sample.names,split=attribute))))
rownames(sample.data)=sample.names
colnames(sample.data)=strsplit(names[[1]][1],split=attribute)
# motus information
motus = data[,-scol]
motus.id = motus$id
rownames(motus)=motus.id
colnames(reads)=motus.id
return(metabarcoding.data(reads,sample.data,motus))
}
#pcr = gh[,grep('^sample',colnames(gh))]
#pcr.names = colnames(pcr)
#pcr.names = sub('sample\\.','',pcr.names)
#sequencer = rep('Solexa',length(pcr.names))
#sequencer[grep('454',pcr.names)]='454'
#sequencer=factor(sequencer)
#
#tmp = strsplit(pcr.names,'\\.[A-Z](sol|454)\\.')
#
#sample = sapply(tmp,function(x) x[1])
#locality = factor(sapply(strsplit(sample,'_'),function(x) x[1]))
#sample = factor(sample)
#repeats= factor(sapply(tmp,function(x) x[2]))
#
#tmp = regexpr('[A-Z](454|sol)',pcr.names)
#run=factor(substr(pcr.names,tmp,tmp+attr(tmp,"match.length")-1))
#
#pcr.metadata = data.frame(run,sequencer,locality,sample,repeats)
#
#rownames(pcr.metadata)=pcr.names

79
R/import.ngsfilter.R Normal file
View File

@@ -0,0 +1,79 @@
#' @include 02_class_metabarcoding.data.R
NULL
#' Read ngsfilter text file
#'
#' Reads the text file used for assigning reads to samples with the
#' \code{ngsfilter} command of the \strong{OBITools} package.
#'
#' @param file a string containing the file name for the \code{ngsfilter} command.
#' @param platewell a string corresponding to the tag used for storing the sample location
#' in the PCR plate. Should be of the form "nbPlate_Well" (e.g. "01_A02").
#' Default is \code{NULL}
#' @return \code{\link{import.ngsfilter.data}} returns a \code{\link{data.frame}} instance
#'
#' @examples
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
#'
#' data(termes)
#'
#' # reading the termes_ngsfilt.txt file
#' termes.ngs=import.ngsfilter.data('termes_ngsfilt.txt', platewell="position")
#'
#' # including ngsfilter data into termes data
#' attr(termes, "samples") = termes.ngs[rownames(termes),]
#'
#' colnames(termes$samples)
#'
#' @seealso \code{\link{import.metabarcoding.data}} and \code{\link{read.obitab}} for other methods of data importation
#'
#' @author Lucie Zinger
#' @keywords DNA metabarcoding
#' @export
#'
import.ngsfilter.data = function(file, platewell=NULL) {
raw = read.table(file, sep="\t")
#get samples names
names = raw[,2]
#form first part of the output table (default ngsfilter text input)
out = raw[,-c(2,3,ncol(raw))]
colnames(out) = c("Experiment", "primerF", "primerR")
#add tags
out[,c("tagF", "tagR")] = do.call("rbind", strsplit(as.vector(raw[,3]), "\\:"))
#collect nb and names of additionnal information
max.add = max(unlist(lapply(strsplit(gsub("^F @ ","", raw[, ncol(raw)]), "; "), length)))
names.add = unique(unlist(lapply(strsplit(unlist(strsplit(gsub("^F @ ","", raw[, ncol(raw)]), "; ")), "="), "[[",1)))
#form table of additionnal info
form = lapply(strsplit(gsub("^F @ ","", raw[, ncol(raw)]), "; "), strsplit, "=")
additionnals = as.data.frame(do.call("rbind", lapply(form, function(y) {
val = rep(NA, , max.add)
names(val) = names.add
val[match(unlist(lapply(y, "[[", 1)), names(val))] = gsub(";", "",unlist(lapply(y, "[[", 2)))
val
})))
#create PCR plate coordinates
if(!is.null(platewell)) {
form = strsplit(as.vector(additionnals[, platewell]), "_")
nbPlate = as.numeric(gsub("^0", "", unlist(lapply(form, "[[", 1))))
wellPlate = unlist(lapply(form, "[[", 2))
xPlate = as.numeric(gsub("[A-Z]", "", wellPlate))
yPlate = as.numeric(as.factor(gsub("[0-9]*", "", wellPlate))) + 8*nbPlate
additionnals = additionnals[,-grep(platewell, colnames(additionnals))]
out = data.frame(out, additionnals, nbPlate, wellPlate, xPlate, yPlate)
}
else {
additionnals[,ncol(additionnals)] = gsub(";","", additionnals[,ncol(additionnals)])
out = data.frame(out, additionnals)
}
rownames(out) = names
return(out)
}

119
R/layers.metabarcoding.R Normal file
View File

@@ -0,0 +1,119 @@
#' @include 02_class_metabarcoding.data.R
NULL
#
#
# Managment of layers
#
# Layers a matrix or factors with the same dimension
# than the read matrix
#
# get motus data.frames
#' @export
setGeneric("layer.names", function(obj) {
return(standardGeneric("layer.names"))
})
#' Returns the names of all the layers
#'
#' \code{layer.names} extracts the list of all the layer
#' names attached to a \code{\link{metabarcoding.data}} instance.
#'
#' @param obj a \code{\link{metabarcoding.data}} instance
#' @return a vector of type \code{character} containing the
#' list of all the layer names.
#'
#' @docType methods
#' @rdname layer.names-methods
#' @aliases layer.names-methods,metabarcoding.data
#'
setMethod("layer.names", "metabarcoding.data", function(obj) {
return(names(obj@layers))
})
#' Returns the a layer associated to a \code{\link{metabarcoding.data}}
#'
#' [[ operator Extracts a layer
#' attached to a \code{\link{metabarcoding.data}} instance.
#'
#' @usage \method{[[}{unmutable}(x,i)
#'
#' @param x a \code{\link{metabarcoding.data}} instance
#' @return matrix or a factor.
#'
#' @docType methods
#' @rdname double-open-brace-methods
#' @aliases double-open-brace-methods,metabarcoding.data
#' @method [[
#' @export
#'
setMethod("[[", "metabarcoding.data",
function(x, i, j, ...) {
if (! is.character(i))
stop('Just named index must be used')
if (i=="reads")
return(x@reads)
if (i=="samples")
return(x@samples)
if (i=="motus")
return(x@motus)
if (i=="reads")
return(x@reads)
return(x@layers[[i,exact=TRUE]])
})
#' @method $
#' @export
setMethod("$", "metabarcoding.data",
function(x, name) {
return(x[[name]])
})
# set one data layer data.frames
#' @method [[<-
#' @export
setMethod("[[<-","metabarcoding.data",
function(x, i, j, ...,value) {
if (any(dim(value)!=c(x@scount,x@mcount)))
stop("data dimmension are not coherent with this metabarcoding.data")
if (hasArg('j'))
stop('Just one dimension must be specified')
if (! is.character(i))
stop('Just named index must be used')
if (i=='reads')
stop('you cannot change the reads layer by this way')
if (i=='motus' | i=='samples')
stop('layers cannot be names motus or samples')
value = as.factor.or.matrix(value)
rownames(value)=rownames(x@reads)
colnames(value)=colnames(x@reads)
x@layers[[i]]=value
return(x)
})
#' @method $<-
#' @export
setMethod("$<-","metabarcoding.data",
function(x, name, value) {
x[[name]]=value
return(x)
})

378
R/metabarcoding_threshold.R Normal file
View File

@@ -0,0 +1,378 @@
#' @include 02_class_metabarcoding.data.R
NULL
#' @export
setGeneric("marginalsum", function(data,MARGIN="sample", na.rm = FALSE) {
return(standardGeneric("marginalsum"))
})
#' Computes marginal sums over read counts.
#'
#' Method \code{marginalsum} computes marginal sums over read counts of
#' a \code{\link{metabarcoding.data}} instance.
#'
#' @param data The \code{\linkS4class{metabarcoding.data}} instance
#' on which marginal sums have to be computed.
#' @param MARGIN Indicates if the sums have to be computed across
#' samples or motus.
#' Allowed values are :
#' \itemize{
#' \item{'sample' or 1} for computing sum across samples
#' \item{'motu' or 2} for computing sum across motus
#' }
#' @param na.rm Logical. Should missing values be omitted from the
#' calculations?
#'
#' @return Returns the vector of marginal sums as a \code{numeric} vector
#'
#' @examples
#' # load termite data set from the ROBITools sample data
#' data(termes)
#'
#' # Computes marginal sums per sample
#' ssum = marginalsum(termes,MARGIN="sample")
#'
#' # Computes marginal sums per MOTU
#' msum = marginalsum(termes,MARGIN="motu")
#'
#' @seealso \code{\linkS4class{metabarcoding.data}}
#'
#' @docType methods
#' @rdname marginalsum-methods
#' @aliases marginalsum-methods,metabarcoding.data
#' @author Aurelie Bonin
#'
setMethod("marginalsum", "metabarcoding.data", function(data,MARGIN='sample', na.rm = FALSE) {
if (MARGIN == 'sample')
MARGIN=1
if (MARGIN == 'motu')
MARGIN=2
readcount = reads(data)
if (MARGIN==1)
margesum = rowSums(readcount,na.rm=na.rm)
else
margesum = colSums(readcount,na.rm=na.rm)
return(margesum)
})
rowSums.metabarcoding.data = function (x, na.rm = FALSE, dims = 1L) {
print("coucou")
}
#' @export
setGeneric("normalize", function(data,MARGIN='sample',as.matrix=FALSE) {
return(standardGeneric("normalize"))
})
#' Normalizes read counts by sample or by MOTU.
#'
#' Method \code{normalize} computes a normalized read aboundancy matrix
#' (relative frequency matrix) of a \code{\link{metabarcoding.data}} instance.
#' Normalization can be done according aboundancies per sample or per MOTU.
#'
#' @param data The \code{\linkS4class{metabarcoding.data}} instance
#' on normalisation have to be computed.
#' @param MARGIN Indicates if the sums have to be computed across
#' samples or motus.
#' Allowed values are :
#' \itemize{
#' \item{'sample' or 1} for computing sum across samples
#' \item{'motu' or 2} for computing sum across motus
#' }
#' @param as.matrix Logical indicating if the normalized aboundancies
#' must be returned as a simple \code{matrix} (TRUE) or as a new
#' instance of the \code{\linkS4class{metabarcoding.data}} class
#' (FALSE, the default case).
#'
#' @return Returns a new instance of \code{\linkS4class{metabarcoding.data}}
#' or a \code{numeric} matrix according to the \code{return.as.matrix}
#' parameter.
#'
#' @examples
#' # load termite data set from the ROBITools sample data
#' data(termes)
#'
#' # Computes normalized aboundancies per sample
#' termes.norm = normalize(termes,MARGIN="sample")
#'
#' # Computes normalized aboundancies per sample and
#' # stores the result as a new layer into the thermes
#' # structure
#' termes$normalized = normalize(termes,MARGIN="sample",as.matrix=TRUE)
#'
#' @seealso \code{\linkS4class{metabarcoding.data}}
#'
#' @docType methods
#' @rdname normalize-methods
#' @aliases normalize-methods,metabarcoding.data
#' @author Aurelie Bonin
#'
setMethod("normalize", "metabarcoding.data", function(data,MARGIN="sample",as.matrix=FALSE) {
if (MARGIN == 'sample')
MARGIN=1
if (MARGIN == 'motu')
MARGIN=2
readcount = reads(data)
margesum = marginalsum(data,MARGIN,na.rm=TRUE)
readcount = sweep(readcount,MARGIN,margesum, FUN="/")
if (as.matrix)
newdata=readcount
else
newdata = copy.metabarcoding.data(data,reads=readcount)
return(newdata)
})
#' @export
setGeneric("threshold", function(data,MARGIN="sample",threshold=0.97) {
return(standardGeneric("threshold"))
})
#' Compute the cumulative threshold of read aboundances.
#'
#' The method \code{threshold} of the class \code{\linkS4class{metabarcoding.data}}
#' computes the thresold to be used for conserving just a part of the global
#' signal. This thresold is computed by ranking aboundances by decreasing order.
#' The cululative sums of these ranked abondencies are computed and the aboundance
#' corresponding to the first sum greater than the threshold is returned as result.
#'
#' @param data The \code{\linkS4class{metabarcoding.data}} instance
#' on normalisation have to be computed.
#' @param MARGIN Indicates if the sums have to be computed across
#' samples or motus.
#' Allowed values are :
#' \itemize{
#' \item{'sample' or 1} for computing sum across samples
#' \item{'motu' or 2} for computing sum across motus
#' }
#' @param threshold a numeric value between 0 and 1 indicating which part of
#' the signal must be conserved. Default value is setup to
#' 0.97 (97% of the total signal).
#'
#' @return a numeric vector containing the limit aboundancy to consider for
#' each sample or each MOTU according to the value of the \code{MARGIN}
#' parameter.
#'
#' @examples
#' # load termite data set from the ROBITools sample data
#' data(termes)
#'
#' # computes threshold value to used for keep 95% of
#' # the reads per MOTU
#'
#' t = threshold(termes,MARGIN='motu',threshold=0.95)
#'
#' @seealso \code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}
#'
#' @docType methods
#' @rdname threshold-methods
#' @aliases threshold-methods,metabarcoding.data
#' @author Aurelie Bonin
#'
setMethod("threshold", "metabarcoding.data", function(data,MARGIN="sample",threshold=0.97) {
onethreshold=function(x,threshold) {
s = x[order(-x)]
cs= cumsum(s)
total=cs[length(cs)]
if (total > 0) {
cs= cs / total
cs = cs > threshold
t = s[cs][1]
}
else t=0
return(t)
}
if (MARGIN == 'sample')
MARGIN=1
if (MARGIN == 'motu')
MARGIN=2
readcount = reads(data)
t = apply(readcount,MARGIN,onethreshold,threshold)
return(t)
})
#' @export
setGeneric("threshold.mask", function(data,MARGIN,threshold=0.97,operator='<') {
return(standardGeneric("threshold.mask"))
})
#' Computes a cumulatif thresold mask for filtering read aboundancies.
#'
#' The method \code{threshold.mask} of the class \code{\linkS4class{metabarcoding.data}}
#' computes a logical matrix of the same size than the read matrix of the data parameter.
#' Each cell of this matrix contains a \code{TRUE} or a \code{FALSE} value according to the
#' relationship existing between the read abondancy and the corresponding theshold as computed
#' by the \code{\link{theshold}} method.
#'
#' (computed value) = (read aboundancy) operator (threshold value)
#'
#' for a cell in the result matrix, \code{(read aboundancy)} is extracted from the read layer.
#' \code{operator} is a comparaison operator and \code{(threshold value)} is estimated with the
#' \code{\link{theshold}} method.
#'
#' @param data The \code{\linkS4class{metabarcoding.data}} instance
#' on normalisation have to be computed.
#' @param MARGIN Indicates if the sums have to be computed across
#' samples or motus.
#' Allowed values are :
#' \itemize{
#' \item{'sample' or 1} for computing sum across samples
#' \item{'motu' or 2} for computing sum across motus
#' }
#' @param threshold a numeric value between 0 and 1 indicating which part of
#' the signal must be conserved. Default value is setup to
#' 0.97 (97% of the total signal).
#' @param operator is a logical comparison operator.
#'
#' @return A logical matrix usable for selecting cell in the read aboundancy matrix.
#'
#' @seealso \code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{threshold}}
#'
#' @docType methods
#' @rdname threshold-mask-methods
#' @aliases threshold.mask-methods,metabarcoding.data
#' @author Aurelie Bonin
#'
setMethod("threshold.mask", "metabarcoding.data", function(data,MARGIN,threshold=0.97,operator='<') {
if (MARGIN == 'sample')
MARGIN=1
if (MARGIN == 'motu')
MARGIN=2
readcount = reads(data)
t = threshold(data,MARGIN,threshold)
mask = apply(readcount,c(2,1)[MARGIN],operator,t)
if (MARGIN==2)
mask = t(mask)
return(mask)
})
#' @export
setGeneric("const.threshold.mask", function(data,MARGIN,threshold=0.01,operator='<') {
return(standardGeneric("const.threshold.mask"))
})
#' Computes a constant thresold mask for filtering read aboundancies.
#'
#' The method \code{const.threshold.mask} of the class \code{\linkS4class{metabarcoding.data}}
#' computes a logical matrix of the same size than the read matrix of the data parameter.
#' Each cell of this matrix contains a \code{TRUE} or a \code{FALSE} value according to the
#' relationship existing between the read abondancy and the global theshold.
#'
#' (computed value) = (normalized read aboundancy) operator (threshold value)
#'
#' for a cell in the result matrix, \code{(normalized read aboundancy)} is extracted from the read layer
#' after normalization.
#' \code{operator} is a comparaison operator and \code{(threshold value)} is estimated with the
#' \code{\link{theshold}} method.
#'
#' @param data The \code{\linkS4class{metabarcoding.data}} instance
#' on normalisation have to be computed.
#' @param MARGIN Indicates if the sums have to be computed across
#' samples or motus.
#' Allowed values are :
#' \itemize{
#' \item{'sample' or 1} for computing sum across samples
#' \item{'motu' or 2} for computing sum across motus
#' }
#' @param threshold a numeric value between 0 and 1 indicating which part of
#' the signal must be conserved. Default value is setup to
#' 0.01 (1% of the normalized signal).
#' @param operator is a logical comparison operator.
#'
#' @return A logical matrix usable for selecting cell in the read aboundancy matrix.
#'
#' @seealso \code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{normalize}}
#'
#' @docType methods
#' @rdname const-threshold-mask-methods
#' @aliases const.threshold.mask-methods,metabarcoding.data
#' @author Aurelie Bonin
#'
setMethod("const.threshold.mask", "metabarcoding.data", function(data,MARGIN,threshold=0.01,operator='<') {
if (MARGIN == 'sample')
MARGIN=1
if (MARGIN == 'motu')
MARGIN=2
readcount = normalize(data,MARGIN,as.matrix=TRUE)
mask = do.call(operator,list(readcount,threshold))
return(mask)
})
#' @export
setGeneric("threshold.set", function(data,
MARGIN,
threshold=0.97,
operator='<',
value=0,
normalize=TRUE,
mask.fun=threshold.mask) {
return(standardGeneric("threshold.set"))
})
setMethod("threshold.set", "metabarcoding.data", function(data,
MARGIN,
threshold=0.97,
operator='<',
value=0,
normalize=TRUE,
mask.fun=threshold.mask) {
if (MARGIN == 'sample')
MARGIN=1
if (MARGIN == 'motu')
MARGIN=2
readcount = reads(data)
if (normalize)
data = normalize(data,c(2,1)[MARGIN])
mask = mask.fun(data,MARGIN,threshold,operator)
readcount[mask] = value
newdata = copy.metabarcoding.data(data,reads=readcount)
return(newdata)
})

407
R/mstat.R Normal file
View File

@@ -0,0 +1,407 @@
#' @include 02_class_metabarcoding.data.R
#' @import igraph
NULL
require(igraph)
# pos = expand.grid(x,y)
#' Computes the pairwise distance matrix as a data.frame where
#'
#' @param x a vector for the X coordinates
#' @param y a vector for the Y coordinates
#' @param labels a vector with the sample names
#'
#' @return a data.frame instance of three columns
#' - a : The label of the first sample
#' - b : The label of the second sample
#' - dist : The euclidian distance beween sample a and b
#'
#' @examples
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' pos = expand.grid(1:3 * 10,1:7 * 10)
#' labels = rownames(termes.ok)
#' d = dist.grid(pos[,1],pos[2],labels)
#'
#' @export
dist.grid = function(x,y,labels=NULL){
pos = data.frame(x,y)
if (is.null(labels))
labels = as.character(interaction(pos))
else
labels = as.character(labels)
llabels=length(labels)
dpos=dist(pos)
a = rep(labels[1:(llabels-1)],(llabels-1):1)
b = do.call(c,(lapply(2:llabels, function(i) labels[i:llabels])))
return(data.frame(a,b,dist=as.vector(dpos)))
}
#' Builds the list of sample groups included in a circle around a central sample
#'
#' @param dtable a distance table between samples as
#' computed by \code{\link{dist.grid}}
#' @param radius the radius of the circle
#' @param center a \code{logical} value indicating if the center of
#' the group must be included in the group
#'
#' @return a list of vectors containing the labels of the group members
#'
#' @examples
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' pos = expand.grid(1:3 * 10,1:7 * 10)
#' labels = rownames(termes.ok)
#' d = dist.grid(pos[,1],pos[2],labels)
#' groups = dist.center.group(d,20)
#'
#' @export
dist.center.group=function(dtable,radius,center=TRUE) {
fgroup = function(c) {
ig = dtable[(dtable[,1]==c | dtable[,2]==c) & dtable[,3] <= radius,]
return(union(ig[,1],ig[,2]))
}
pos = as.character(union(dtable[,1],dtable[,2]))
g = lapply(pos,fgroup)
names(g) = pos
if (!center)
g = mapply(setdiff,g,pos)
return(g)
}
#' Builds the list of sample groups including samples closest than a define distance
#'
#' A graph is build by applying the threshold \code{dmax} to the distance matrix
#' A group is a clique max in this graph. Consequently all member pairs of a group
#' are distant by less or equal to \code{dmax}.
#'
#' @param dtable a distance table between samples as
#' computed by \code{\link{dist.grid}}
#' @param dmax the maximum distance between two samples
#'
#' @return a list of vectors containing the labels of the group members
#'
#' @examples
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' pos = expand.grid(1:3 * 10,1:7 * 10)
#' labels = rownames(termes.ok)
#' d = dist.grid(pos[,1],pos[2],labels)
#' groups = dist.clique.group(d,20)
#'
#' @export
dist.clique.group=function(dtable,dmax,center=True) {
gp = igraph::graph.edgelist(as.matrix(dtable[dtable$dist <= dmax,c('a','b')]),directed=FALSE)
g = igraph::maximal.cliques(gp)
return(lapply(g, function(i) igraph::V(gp)$name[i]))
}
#' Computes the univariate M statistics
#'
#' @param w the weigth matrix indicating the presence probability of each motu
#' in each samples. Each line corresponds to a sample and each column
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
#' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
#'
#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
#' function
#'
#' @seealso \code{\link{dist.center.group}}
#' @seealso \code{\link{m.weight}}
#'
#' @examples
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' pos = expand.grid(1:3 * 10,1:7 * 10)
#' labels = rownames(termes.ok)
#' d = dist.grid(pos[,1],pos[2],labels)
#' groups = dist.center.group(d,20)
#' w = m.weight(termes.ok)
#' m = m.univariate(w,groups)
#'
#' @references Marcon, E., Puech, F., and Traissac, S. (2012).
#' Characterizing the relative spatial structure of point patterns.
#' International Journal of Ecology, 2012.
#'
#' @export
m.univariate = function(w,groups) {
nunivar = function(members,center) {
g = w[members,]
wn = colSums(g)
wa = sum(wn)
wn = wn - center
wa = wa - center
p = wn / wa * center
return(p)
}
centers = lapply(names(groups),function(x) w[x,])
Wf = colSums(w)
Wa = sum(Wf)
Denom.univar = colSums(w * (sweep(-w,2,Wf,'+') / (Wa - w)))
Num.univar = rowSums(mapply(nunivar,groups,centers))
Munivar=Num.univar/Denom.univar
Munivar[Denom.univar==0]=0
return(Munivar)
}
#' Computes the bivariate M statistics
#'
#' The function computes the bivariate M statiscics for a set of target species around a set of
#' focus species.
#'
#' @param w1 the weigth matrix indicating the presence probability of each motu
#' used as focus species in each samples. Each line corresponds to a sample and each column
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
#' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
#'
#' @param w2 the weigth matrix indicating the presence probability of each motu
#' used as target species in each samples. Each line corresponds to a sample and each column
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
#' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
#' if \code{w2} is not set, w1 is also used as target species. in this case the diagonal
#' of the matrix return contains the univariate M statistic for the diferent species.
#'
#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
#' function
#'
#' @return a matrix of M bivariate statistics with one focus species by row and one target species
#' by columns If \code{w2} is not specified the diagonal of the matrix is equal to the univariate
#' M statistic of the corresponding species.
#'
#' @seealso \code{\link{dist.center.group}}
#' @seealso \code{\link{m.weight}}
#'
#' @examples
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' pos = expand.grid(1:3 * 10,1:7 * 10)
#' labels = rownames(termes.ok)
#' d = dist.grid(pos[,1],pos[2],labels)
#' groups = dist.center.group(d,20)
#' w = m.weight(termes.ok)
#' m = m.bivariate(w,groups)
#'
#' @references Marcon, E., Puech, F., and Traissac, S. (2012).
#' Characterizing the relative spatial structure of point patterns.
#' International Journal of Ecology, 2012.
#'
#' @export
m.bivariate = function(w1,w2=NULL,groups) {
nunbivar = function(members,center) {
g = w2[members,]
wn = colSums(g)
wa = sum(wn)
if (self){
mwn = wn %*% t(rep(1,length(wn)))
diag(mwn)= wn - center
wa = wa - center
wna = mwn/wa
p = sweep(wna,2,center,'*')
#p = center %*% wna
}
else {
wna= matrix(wn/wa,nrow=1)
p = center %*% wna
}
return(p)
}
if (is.null(w2)){
self = TRUE
w2=w1
}
else {
self = FALSE
}
centers = lapply(names(groups),function(x) w[x,])
Wf = colSums(w1)
Wn = colSums(w2)
Wa = sum(Wn)
if (self){
Wn = sweep(-w1,2,Wn,'+')
Wna = Wn/(Wa - w1)
Denom.bivar = t(w1) %*% Wna
}
else {
Wna= t(Wn/Wa)
Denom.bivar = Wf %*% Wna
}
Num.bivar = matrix(0,nrow=ncol(w1),ncol=ncol(w2))
ng = length(groups)
for (i in 1:ng) {
Num.bivar = Num.bivar + nunbivar(groups[[i]],centers[[i]])
}
Mbivar=Num.bivar/Denom.bivar
Mbivar[Denom.bivar==0]=0
return(Mbivar)
}
#' Computes a weigth matrix from a \code{\linkS4class{metabarcoding.data}}
#'
#' The weight can be considered as a propability of presence of a MOTU in a
#' given sample. This function defines this probability as the fraction of
#' the maximal occurrence frequency over all samples.
#'
#' @param data a \code{\linkS4class{metabarcoding.data}} instance
#'
#' @return a weight matrix usable for M statistics
#'
#' @examples
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' w = m.weight(termes.ok)
#'
#' @export
m.weight = function(data) {
ndata = normalize(data,MARGIN='sample')
fmax=apply(ndata$reads,2,max)
w = sweep(ndata$reads,2,fmax,'/')
rownames(w)=rownames(ndata)
colnames(w)=colnames(ndata)
return(w)
}
#' Simulate null distribion of the M statistics by Monte-Carlo
#'
#' Computes the null empirical distribution of the M statistics
#' by shuffling MOTUs among location.
#'
#' @param w the weigth matrix indicating the presence probability of each motu
#' in each samples. Each line corresponds to a sample and each column
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
#' names.
#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
#' function
#' @param resampling the number of simulation to establish the null distribution
#'
#' @return a matrix of M score under the null hypothesis of random distribution of MOTUs
#' with a MOTUs per line and a culumn per simulation
#'
#' @examples
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' pos = expand.grid(1:3 * 10,1:7 * 10)
#' labels = rownames(termes.ok)
#' d = dist.grid(pos[,1],pos[2],labels)
#' groups = dist.center.group(d,20)
#' w = m.weight(termes.ok)
#' dnull = dm.univariate(w,groups)
#'
#' @export
dm.univariate = function(w,groups,resampling=100) {
shuffle = function(w){
wr =apply(w,2,function(y) sample(y,length(y),replace=FALSE))
rownames(wr)=rownames(w)
return(wr)
}
msim = function(x) {
return(m.univariate(shuffle(w),groups))
}
dnull = mapply(msim,1:resampling)
rownames(dnull) = colnames(w)
return(dnull)
}
#' Test the significance of the M statistics by Monte-Carlo
#'
#' Computes computes the p.value the M statistics asociated to a MOTU
#' by shuffling MOTUs among location.
#'
#' @param w the weigth matrix indicating the presence probability of each motu
#' in each samples. Each line corresponds to a sample and each column
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
#' names.
#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
#' function
#' @param resampling the number of simulation to establish the null distribution
#'
#' @param alternative a character value in \code{c('two.sided','less','greater')}
#' - two.sided : the m stat is check against the two side of the empirical
#' M distribution
#' - less : test if the M stat is lesser than the M observed in the the empirical
#' M distribution (exlusion hypothesis)
#' - greater : test if the M stat is greater than the M observed in the the empirical
#' M distribution (aggregation hypothesis)
#'
#' @return a vector of p.value with an attribute \code{m.stat} containing the actual M stat
#' for each MOTUs
#'
#' @examples
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' pos = expand.grid(1:3 * 10,1:7 * 10)
#' labels = rownames(termes.ok)
#' d = dist.grid(pos[,1],pos[2],labels)
#' groups = dist.center.group(d,20)
#' w = m.weight(termes.ok)
#' pval = m.univariate.test(w,groups)
#'
#' @export
m.univariate.test = function(w,groups,resampling=100,alternative='two.sided') {
dnull = dm.univariate(w,groups,resampling)
m = m.univariate(w,groups)
pnull = sapply(1:dim(dnull)[1],function(y) 1 - ecdf(dnull[y,])(m[y]))
p.value=NULL
if (alternative=='two.sided') {
p.value = mapply(min,pnull,1 - pnull)
}
if (alternative=='less') {
p.value = pnull
}
if (alternative=='greater') {
p.value = 1 - pnull
}
# Set p.value to 1 if the MOTU occurres in only one place
n = colSums(w > 0)
p.value[n==1]=1
names(p.value) = colnames(w)
attr(p.value,'m.stat')=m
return(p.value)
}

118
R/obiclean.R Normal file
View File

@@ -0,0 +1,118 @@
#' @include 02_class_metabarcoding.data.R
NULL
# TODO: Add comment
#
# Author: coissac
###############################################################################
#' @export
setGeneric("extracts.obiclean", function(obj) {
return(standardGeneric("extracts.obiclean"))
})
#' Extracts the obiclean results
#'
#' The method \code{extracts.obiclean} of the class \code{\linkS4class{metabarcoding.data}}
#' extracts \code{obiclean} results from the MOTUs descriptions include in the
#' \code{\linkS4class{metabarcoding.data}} instance.
#' When an \code{obitab} file is imported using the \code{\link{import.metabarcoding.data}}
#' if \code{obiclean} results are present in the file they are stored in the
#' \code{motu} data.frame. By calling this methods, MOTU descriptors describing
#' the \code{obiclean} status are moved to a set of layers.
#'
#' @param obj the \code{\linkS4class{metabarcoding.data}} to analyze
#'
#' @return the modified \code{\linkS4class{metabarcoding.data}} instance
#'
#' @examples
#'
#' # load termite data set from the ROBITools sample data
#' data(termes)
#'
#' # shows the initial list of layer names
#' layer.names(t)
#'
#' # extracts the obiclean status
#' termes = extracts.obiclean(termes)
#'
#' # shows the name of the newly created layers
#' layer.names(t)
#'
#'
#'
#' @seealso \code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{normalize}}
#'
#' @docType methods
#' @rdname extracts-obiclean-methods
#' @aliases extracts.obiclean-methods,metabarcoding.data
#' @author Eric Coissac
#'
setMethod("extracts.obiclean", "metabarcoding.data", function(obj) {
pat = "^obiclean_status:.*$"
cols = colnames(obj@motus)
cleancols = grep(pat,cols)
clean.names=cols[cleancols]
p = grep(pat,cols)
d = t(as.factor.or.matrix(obj@motus[,p]))
n = sapply(strsplit(cols[p],':'),function(y) y[[2]])
rownames(d)=n
d = d[rownames(obj@reads),]
obj[["obiclean_status"]]=d
newmotus = obj@motus[-cleancols]
pat = "^obiclean_count:.*$"
cols = colnames(newmotus)
cleancols = grep(pat,cols)
clean.names=cols[cleancols]
p = grep(pat,cols)
d = t(as.factor.or.matrix(newmotus[,p]))
n = sapply(strsplit(cols[p],':'),function(y) y[[2]])
rownames(d)=n
d = d[rownames(obj@reads),]
obj[["obiclean_count"]]=d
newmotus = newmotus[-cleancols]
pat = "^obiclean_cluster:.*$"
cols = colnames(newmotus)
cleancols = grep(pat,cols)
clean.names=cols[cleancols]
p = grep(pat,cols)
d = t(as.factor.or.matrix(newmotus[,p]))
n = sapply(strsplit(cols[p],':'),function(y) y[[2]])
rownames(d)=n
d = d[rownames(obj@reads),]
obj[["obiclean_cluster"]]=d
newmotus = newmotus[-cleancols]
newdata = copy.metabarcoding.data(obj,motus=newmotus)
return(newdata)
})
#' @export
setGeneric("extracts.obiclean_cluster", function(obj) {
return(standardGeneric("extracts.obiclean_cluster"))
})
setMethod("extracts.obiclean_cluster", "metabarcoding.data", function(obj) {
obiclean = extracts.obiclean(obj)
obihead = obiclean[,! is.na(obiclean$motus$obiclean_head)]
obihead$obiclean_count[is.na(obihead$obiclean_count)]=0
reads = obihead$obiclean_count
l = obihead@layers[layer.names(obihead) != "obiclean_count"]
newdata = copy.metabarcoding.data(obihead,reads=reads,layers=l)
return(newdata)
}
)

0
R/pcrslayer.R Normal file
View File

84
R/plot.PCRplate.R Normal file
View File

@@ -0,0 +1,84 @@
#' @include 02_class_metabarcoding.data.R
NULL
#' Plot PCR plates
#'
#' Plots samples localization in PCR plates, and points out problematic samples if provided.
#'
#' @param x a \code{\link{metabarcoding.data}} object
#' @param samples a character vector containing names of problematic samples. Default is \code{NULL}
#' @param different a boolean indicating whether different tags where used in forward and reverse to identify samples. Default is \code{TRUE}
#' @param ... arguments ot be passed to methods, such as graphical parameters
#'
#' @return \code{\link{plot.PCRplate}} returns a plot displaying no more than 4 PCR plates, with problematic sample localization
#'
#' @examples
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
#'
#' data(termes)
#'
#' # reading the termes_ngsfilt.txt file
#' termes.ngs=import.ngsfilter.data('termes_ngsfilt.txt', platewell="position")
#'
#' # including ngsfilter data into termes data
#' attr(termes, "samples") = termes.ngs[rownames(termes),]
#'
#' #plot PCR plate plan
#' col = rep("green", nrow(termes))
#' col[grep("r", rownames(termes))] = "red"
#' plot.PCRplate(termes, col=col)
#'
#' #highlighting location of samples with low identification score
#'
#' #low quality taxonomic assignements identification
#' library(plotrix)
#' weighted.hist(termes$motus$best_identity, colSums(termes$reads), breaks = 20, ylab = "Nb reads", xlab = "Ecotag scores", xaxis=F)
#' axis(1, labels = T)
#' lowqual.seq = rownames(termes$motus)[termes$motus$best_identity < 0.7]
#'
#' #identification and localization (in PCR plate) of samples with high proportions of low quality taxonomic assignements
#' termes.freq= normalize(termes, MARGIN=1)$reads
#' hist(log10(rowSums(termes.freq[,lowqual.seq]) + 1e-05), breaks = 20, xlab = "Prop low quality reads")
#' lowqual.sample = rownames(termes)[log10(rowSums(termes.freq[, lowqual.seq]) + 1e-05) > -0.5]
#'
#' plot.PCRplate(termes, lowqual.sample, col=col)
#'
#' @seealso \code{\link{import.metabarcoding.data}}
#'
#' @author Lucie Zinger
#' @keywords DNA metabarcoding
#' @export
#'
plot.PCRplate = function(x, samples=NULL, col="cyan2", different=T, ...) {
if(length(grep("xPlate", colnames(x$samples)))==0 |
length(grep("yPlate", colnames(x$samples)))==0) {
stop("samples/controls position in PCR plates (xPlate and yPlate) are not defined")
}
if(length(grep("tagF", colnames(x$samples)))==0 |
length(grep("tagR", colnames(x$samples)))==0) {
stop("tags (tagF and tagR) are not defined")
}
nplate = max(x$samples$nbPlate)
if(nplate>4) {
stop("Cannot plot more than 4 plates")
}
plot(x$samples$xPlate, -x$samples$yPlate, pch=19, xaxt="n", yaxt="n", col=col,
xlim=c(-5,17), ylab="y plate", xlab= "x plate", ylim=c(-4.5*8-5,0), ...)
if(different==T) {
text(-3, -unique(x$samples$yPlate[order(x$samples$yPlate)]), unique(x$samples$tagF[order(x$samples$yPlate)]), cex=0.5)
text(unique(x$samples$xPlate[order(x$samples$xPlate)]), -5, unique(x$samples$tagR[order(x$samples$xPlate)]), cex=0.5, srt=90)
}
abline(h=-seq(8.5,8*nplate+0.5,8), lty=2, col="grey")
segments(c(0,13), rep(min(-x$samples$yPlate),2), c(0,13), c(0,0), lty=2, col="grey")
#plot problematic samples
if(!is.null(samples)) {
points(x$samples[samples,"xPlate"], -x$samples[samples,"yPlate"], pch="x")
}
}

105
R/plot.seqinsample.R Normal file
View File

@@ -0,0 +1,105 @@
#' @include 02_class_metabarcoding.data.R
NULL
#' Plot sequence abundance in samples
#'
#' Plots relative abundances of a set of sequences in all samples (log10 transformed)
#'
#'
#' @param x a \code{\link{metabarcoding.data}} object
#' @param seqset a vetcor with sequences names
#' @param seqtype a string indicating what type of sequences are displayed
#' @param controls a vector indicating the negative controls names in the x object.
#' Default is \code{NULL}
#'
#' @return returns a plot with the log10 transformed relative porportion of
#' selected MOTUs in each samples. If the number of samples is > 96,
#' then the plot is displayed in 4 panels
#'
#' @examples
#'
#' data(termes)
#'
#' seqset = rownames(termes$motus)[which(termes$motus$genus_name=="Anoplotermes")]
#' plot.seqinsample(termes, seqset, "Anoplotermes")
#'
#' controls = rownames(termes)[grep("r", rownames(termes))]
#' seqset = rownames(termes$motus)[which(termes$motus$best_identity<0.7)]
#' plot.seqinsample(termes, seqset, "Not assigned", controls)
#'
#' @seealso \code{\linkS4class{taxonomy.obitools}}, and method \code{\link{taxonmicank}}
#'
#' @author Lucie Zinger
#' @keywords metabarcoding
#'
#' @export
#'
plot.seqinsample = function(x, seqset, seqtype, controls=NULL){
require(vegan)
x.freq = vegan::decostand(x$reads,"total",1)
if(!is.null(controls)){
controls.ind = match(controls, rownames(x.freq))
}
if(nrow(x.freq)>96){
x.freq.parse = seq(0,round(nrow(x$samples), digit=0),
round(nrow(x$samples)/4, digit=0))
layout(matrix(c(1,2,3,1,4,5),3,2), height=c(0.3,1,1))
par(oma=c(1,1,1,0), mar=c(3,3,1,1))
#legend
breaks = seq(log10(1e-4),log10(1), length.out=100)
plot(breaks, rep(1,100), col=topo.colors(100), pch=15, cex=2, ylim=c(0,1.5),
xaxt="n", yaxt="n", bty='n')
text(breaks[seq(1,100,10)], rep(0.7,length(seq(1,100,10))),
round(10^breaks[seq(1,100,10)],4))
mtext("Seqence frequencies:", side=3, line=0, cex=0.8)
#plot
for(i in 1:(length(x.freq.parse)-1)) {
range = (x.freq.parse[i]+1):(x.freq.parse[i]+round(nrow(x$samples)/4, digit=0))
mat = x.freq[range,seqset]
image(log10(mat),col = topo.colors(100), xaxt="n", yaxt="n", breaks=c(breaks,0))
if(!is.null(controls)){
if(length(na.omit(match(controls.ind, range)))!=0){
abline(v=seq(0,1,l=round(nrow(x$samples)/4, digit=0))[match(controls.ind, range)],col="red", lty=3)
}}
axis(side=1,at=seq(0,1,l=round(nrow(x$samples)/4,digit=0)),
labels=rownames(x$samples)[range],
las=2, cex.axis=0.3)
}
mtext(side=2, paste(seqtype, "n = ", length(seqset)), outer=T, cex=0.7, font=3)
mtext(side=1, "Samples", cex=0.7, outer=T)
} else {
layout(matrix(c(1,2,1,2),2,2), height=c(0.3,1))
par(oma=c(1,1,1,0), mar=c(3,3,1,1))
#legend
breaks = seq(log10(1e-4),log10(1), length.out=100)
plot(breaks, rep(1,100), col=topo.colors(100), pch=15, cex=2, ylim=c(0,1.5),
xaxt="n", yaxt="n", bty='n')
text(breaks[seq(1,100,10)], rep(0.7,length(seq(1,100,10))),
round(10^breaks[seq(1,100,10)],4))
mtext("Seqence frequencies:", side=3, line=0, cex=0.8)
image(log10(x.freq[,seqset]),col = topo.colors(100), xaxt="n", yaxt="n", breaks=c(breaks,0))
if(!is.null(controls)){
abline(v=seq(0,1,l=round(nrow(x$samples), digit=0))[controls.ind],col="red", lty=3)
}
axis(side=1,at=seq(0,1,l=round(nrow(x$samples),digit=0)),
labels=rownames(x$samples),
las=2, cex.axis=0.3)
mtext(side=2, paste(seqtype, "n = ", length(seqset)), outer=T, cex=0.7, font=3)
mtext(side=1, "Samples", cex=0.7, outer=T)
}
}

99
R/rarefy.R Normal file
View File

@@ -0,0 +1,99 @@
#' @include 02_class_metabarcoding.data.R
NULL
# TODO: Add comment
#
# Author: coissac
###############################################################################
#' @export
setGeneric("rarefy", function(x,n,first.pass=0.95,pseudo.count=0,...) {
return(standardGeneric("rarefy"))
})
setMethod("rarefy", "ANY", function(x,n,first.pass=0.95,pseudo.count=0,sum=NA) {
if (is.na(sum))
sum=sum(x)
if (sum < sum(x))
stop("sum parameter must be greater or equal to sum(x)")
grey = sum-sum(x)
probs = x + pseudo.count
if (grey > 0)
probs = c(probs,grey)
# Just to ensure at least one execution of the loop
n1 = n * 2
while(n1 > n)
n1 = rpois(1,n * first.pass)
rep1 = as.vector(rmultinom(1,n1,probs))
n2 = sum(rep1)
levels = 1:length(probs)
rep2= as.vector(table(factor(sample(levels,
n - n2,
replace=TRUE,
prob = probs),
levels=levels)))
rep1 = (rep1 + rep2)
if (grey > 0)
rep1 = rep1[-length(rep1)]
return(rep1)
})
setMethod("rarefy", "metabarcoding.data", function(x,n,first.pass=0.95,pseudo.count=0,MARGIN='sample') {
if (MARGIN == 'sample')
MARGIN=1
if (MARGIN == 'motu')
MARGIN=2
dreads= dim(x@reads)
rreads= matrix(0,nrow = dreads[1] , ncol = dreads[2])
if (MARGIN == 1)
for (i in 1:dreads[1]) {
rreads[i,]=rarefy(x@reads[i,],
n=n,
first.pass=first.pass,
pseudo.count=pseudo.count)
}
# rreads = t(apply(reads,1,rarefy,n=n,
# first.pass=first.pass,
# pseudo.count=pseudo.count))
else
for (i in 1:dreads[2]) {
rreads[,i]=rarefy(x@reads[,i],
n=n,
first.pass=first.pass,
pseudo.count=pseudo.count)
}
# rreads = as.matrix(apply(reads,2,rarefy,n=n,
# first.pass=first.pass,
# pseudo.count=pseudo.count))
rreads=as.matrix(rreads)
rownames(rreads) = rownames(x@reads)
colnames(rreads) = colnames(x@reads)
newdata = copy.metabarcoding.data(x,reads=rreads)
return(newdata)
})

56
R/read.ngsfilter.R Normal file
View File

@@ -0,0 +1,56 @@
#' @include 02_class_metabarcoding.data.R
NULL
#' Read an OBITools ngsfilter file
#'
#' Reads a ngsfilter file as formatted for the OBITools. For now, needs to be tab delimited till the "F" column.
#' Any additionnal information needs to be space delimited.
#'
#' @seealso \code{\link{import.metabarcoding.data}}
#' @author Lucie Zinger
#' @keywords data import
#' @export
#'
read.ngsfilter <- function(filename, decimal='.', as.is=!stringsAsFactors, stringsAsFactors = default.stringsAsFactors()) {
t<-read.table(file=filename, header=F, sep="\t", as.is=T)
beg <- t[,1:5]
colnames(beg) <- c('experiment','sample','tags','forward_primer','reverse_primer')
if (length(unique(beg$sample))==nrow(beg))
rownames(beg) <- beg$sample
end <- t[,c(2,6)]
#F <- unlist(lapply(end$V6, function(x) strsplit(x,"@")[[1]][1]))
rawextras <- unlist(lapply(end$V6, function(x) strsplit(x,"@")[[1]][2]))
rawextras <- lapply(rawextras, function(s) strsplit(s, '; ')[[1]])
rawextras <- lapply(rawextras, function(l) unlist(lapply(l, function(s) sub("^ +","",s))))
rawextras <- lapply(rawextras, function(l) unlist(lapply(l, function(s) sub(" +$","",s))))
rawextras <- lapply(rawextras, function(l) unlist(lapply(l, function(s) strsplit(s,"="))))
columnnames <- unique(unlist(lapply(rawextras, function(l) l[seq(1,length(l),2)])))
m <- matrix(nrow=nrow(end), ncol=length(columnnames))
colnames(m) <- columnnames
m <- as.data.frame(m)
#print(head(rawextras))
tt <- lapply(rawextras, function(l) list(l[seq(1,length(l),2)],l[seq(2,length(l),2)]))
invisible(lapply(1:length(tt), function(i){m[i,tt[[i]][[1]]] <<- tt[[i]][[2]]}))
invisible(lapply(colnames(m), function(n) m[,n] <<- type.convert(m[,n], dec=decimal, as.is=as.is)))
ngs = cbind(beg, m)
rownames(ngs) = ngs$sample
class(ngs)<-c('ngsfilter.data',class(ngs))
return(ngs)
}

39
R/read.obitab.R Normal file
View File

@@ -0,0 +1,39 @@
#' @include 02_class_metabarcoding.data.R
NULL
#' Reads a data file produced by the obitab command
#'
#' Read a data file issued from the convertion of a fasta
#' file to a tabular file by the obitab command
#'
#' @param file a string containing the file name of the obitab file.
#' @param sep Column separator in the obitab file.
#' The default separator is the tabulation.
#'
#' @return a \code{data.frame} instance containing the obitab file
#'
#' @examples
#' require(ROBITools)
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
#'
#' # read the termes.tab file
#' termes=read.obitab('termes.tab')
#'
#' # print the dimensions of the data.frame
#' dim(termes)
#'
#' @seealso \code{\link{import.metabarcoding.data}}
#' @author Eric Coissac
#' @export
#'
read.obitab <-
function(filename,sep='\t') {
data=read.delim(filename,sep=sep,strip.white=T,check.names =F)
data
}

17
R/read.sumatra.R Normal file
View File

@@ -0,0 +1,17 @@
#' @include 02_class_metabarcoding.data.R
NULL
# TODO: Add comment
#
# Author: coissac
###############################################################################
read.sumatra = function(filename) {
data = read.table(filename,sep="\t",header=FALSE)
score = data[,3]
name.first = mapply(min,as.character(s[,1]),as.character(s[,2]))
name.second= mapply(max,as.character(s[,1]),as.character(s[,2]))
sname = as.character(interaction(data[,1],data[,2]))
}

123
R/s3objects.R Normal file
View File

@@ -0,0 +1,123 @@
# TODO: Add comment
#
# Author: coissac
###############################################################################
#' Adds a class into the class hierarchie attribute.
#'
#' \code{addS3Class} adds a new class name to the vector
#' of class associated to the object. This the way to
#' assign an object to an S3 class. \code{addS3Class} add
#' the new class name in front of the class vector
#'
#' @param object the object to modify
#' @param classname the name of the new class
#'
#' @return the object given as parametter casted to the new
#' class
#'
#' @examples
#' x = c(1,3,2,5)
#' x = addS3Class(x,"my.vector")
#' class(x)
#'
#' @seealso \code{\link{rmS3Class}}
#'
#' @note for efficiency purpose no check is done on the input
#' parametters
#'
#' @keywords system function
#'
#' @author Eric Coissac
#' @export
#'
addS3Class = function(object,classname) {
class(object) = c(classname,class(object))
return(object)
}
#' Removes a class from the class hierarchie attribute.
#'
#' \code{rmS3Class} removes a class name from the vector
#' of class associated to the object. This the way to
#' remove the association between an object and a S3 class.
#'
#' @param object the object to modify
#' @param classname the name of the class to remove
#'
#' @return the object given as parametter.
#'
#' @examples
#' x = c(1,3,2,5)
#' x = addS3Class(x,"my.vector")
#' class(x)
#' x = rmS3Class(x,"my.vector")
#' class(x)
#'
#' @seealso \code{\link{addS3Class}}
#'
#' @note for efficiency purpose no check is done on the input
#' parametters
#'
#' @keywords system function
#'
#' @author Eric Coissac
#' @export
#'
rmS3Class = function(object,classname) {
c = class(object)
if (! is.null(c))
index = match(classname,c)
class(object)=c[-index]
return(object)
}
#' create basic functions to manipulate a new S3 class
#'
#' createS3Class function create in the \code{package:ROBITools}
#' environment an \code{is.xxx} function and an \code{as.xxx} function
#' allowing to test if an abject belong the class \code{xxx} and to add
#' the class \code{xxx} to the class list of an object. \code{xxx} is a
#' generic class name that is specified through the \code{classname}
#' argument of the function.
#'
#' @param classname a \code{character string} indicating the name
#' of the new class.
#'
#' @examples
#'
#' # Create a new S3 class named mynewclass
#' createS3Class('mynewclass')
#'
#' #create a new vector object
#' x=c(1,4,6)
#'
#' # test if it belongs the new class, that is false
#' is.mynewclass(x)
#'
#' # Associate x to the new class
#' as.mynewclass(x)
#'
#' # test again if x belongs the new class, that is now true
#' is.mynewclass(x)
#'
#' @seealso \code{\link{rmS3Class}}
#'
#' @note Take care that the new functions are created in the
#' \code{package:ROBITools} environment.
#'
#' @keywords system function
#'
#' @author Eric Coissac
#' @export
#'
createS3Class = function(classname) {
is.class = function(object) any(class(object)==classname)
as.class = function(object) return(addS3Class(object,classname))
assign(paste('is',classname,sep="."),is.class,envir=globalenv())
assign(paste('as',classname,sep="."),as.class,envir=globalenv())
}

89
R/taxoDBtree.R Normal file
View File

@@ -0,0 +1,89 @@
#'@include 02_class_metabarcoding.data.R
#'@import ROBITaxonomy
NULL
#' Construct a taxonomic tree from a list of taxa
#'
#' Construct a graph from a table containing the taxonomic path of sequences
#'
#'
#' @param x a table containing the taxonomic path of the references. Typically an output from get.classic.taxonomy
#'
#' @return g a directed graph displaying the taxonomy hierarchy of the input data. Stored in a \code{\link{igraph}} object
#' where the taxonomic ranks of the vertices are added to the vertices attributes
#'
#' @examples
#'
#' data(termes)
#'
#' taxo=default.taxonomy()
#'
#' termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid")
#' head(termes.taxo.table)
#'
#' graph.tax.termes = dbtree(termes.taxo.table[,1:7])
#' library(igraph)
#'
#' #plot the tree
#' coord = layout.reingold.tilford(graph.tax.termes, root=1, circular=F)
#' v.cex = as.factor(V(graph.tax.termes)$rank)
#' levels(v.cex) = match(levels(v.cex), colnames(termes.taxo.table))
#' plot(graph.tax.termes, vertex.size=1, vertex.label.cex=2*(as.numeric(as.vector(v.cex))^-1), edge.arrow.size=0, layout=coord)
#'
#'
#' #Vizualization with sequence counts
#' tax.count = log10(colSums(termes$reads)[match(as.vector(V(graph.tax.termes)$name), termes$motus$scientific_name)])
#' tax.count[is.na(tax.count)|tax.count<0] = 0.01
#' V(graph.tax.termes)$count = unname(tax.count)
#'
#' plot(graph.tax.termes, vertex.size=V(graph.tax.termes)$count, vertex.label.cex=2*(as.numeric(as.vector(v.cex))^-1), edge.arrow.size=0, layout=coord)
#'
#'
#' @seealso \code{\link{get.classic.taxonomy}}
#' @author Lucie Zinger
#' @export
dbtree = function(x) {
#dealing with noranks
x2 = x
for (i in 1:ncol(x2)) {
x2[,i] = as.character(x[,i])
if(length(which(is.na(x[,i])==T))!=0) {
if(i==1) {
x2[which(is.na(x[,i])==T),i] = "NR"
} else {
x2[which(is.na(x[,i])==T),i] = as.character(x2[,i-1])[which(is.na(x2[,i])==T)]
}
}
}
#prepare an edgelist
edgelist = list()
for (i in 1:(ncol(x2)-1)){
out = x2[,c(i,i+1)]
out2 = out[-which(duplicated(out)==T),]
colnames(out2) = c("parent", "kid")
edgelist[[i]] = out2[which(out2[,1]!=out2[,2]),]
}
edgelist = do.call("rbind", edgelist)
#construct the graph
g = igraph::graph.edgelist(as.matrix(edgelist), directed=T)
#get taxorank for each taxa
ranks = do.call("rbind", lapply(1:ncol(x), function(y) {
out = cbind(unique(as.character(x[,y])), colnames(x)[y])
out
}))
#Assign nodes to taxorank
igraph::V(g)$rank = ranks[match(igraph::V(g)$name, ranks[,1]),2]
return(g)
}

74
R/taxonomic.resolution.R Normal file
View File

@@ -0,0 +1,74 @@
#' @import ROBITaxonomy
#' @include 02_class_metabarcoding.data.R
NULL
#' Dataset taxonomic resolution summary.
#'
#' Summarizes the taxonomic relution of reads and MOTUs over the entire dataset
#'
#'
#' @param x a \code{\link{metabarcoding.data}} object
#' @param colranks a string indicating column name where ranks are stored in \code{x}
#' @param colscores a string indicating column name where taxonomic identification scores are stored in \code{x}
#' @param thresh a threshold for defining at which taxonomic identification scores a sequence can be considered as "not assigned".
#' Default is \code{0.7}
#'
#' @return returns a data.frame and piecharts of the number/proportion of MOTUs/reads assigned to each taxonomic levels
#'
#' @examples
#'
#' data(termes)
#' taxo=default.taxonomy()
#'
#' termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid")
#' attr(termes, "motus") = data.frame(termes$motus, termes.taxo.table)
#' attr(termes, "motus")["count"] = colSums(termes$reads)
#'
#' summary.taxores(termes, "taxonomic_rank_ok","best_identity")
#'
#' @seealso \code{\linkS4class{taxonomy.obitools}}, and method \code{\link{taxonmicank}}
#'
#' @author Lucie Zinger
#' @keywords taxonomy
#'
#' @export
#'
summary.taxores = function(x,colranks,colscores, thresh=0.7){
#vector encompassing all ranked possible taxonomic levels
taxorank = c("superkingdom", "kingdom", "subkingdom", "superphylum", "phylum", "subphylum", "superclass", "class", "subclass", "infraclass",
"superorder", "order", "suborder", "infraorder", "parvorder", "superfamily", "family", "subfamily", "supertribe", "tribe",
"subtribe", "supergenus", "genus", "subgenus", "species group", "species subgroup", "superspecies", "species", "subspecies",
"varietas", "forma", "no rank", "not assigned")
#settings if thresh
ranks = as.vector(x$motus[,colranks])
ranks[x$motus[,colscores]<thresh] = "not assigned"
#nb of otus
tmp = table(ranks)
taxores.otu = tmp[match(taxorank, names(tmp))]
names(taxores.otu) = taxorank
taxores.otu[is.na(taxores.otu)] = 0
#nb of reads
tmp = aggregate(x$motus$count, by=list(ranks), sum)
taxores.reads = tmp[match(taxorank,tmp[,1]),2]
names(taxores.reads) = taxorank
taxores.reads[is.na(taxores.reads)] = 0
#plot
layout(matrix(c(1,2,1,3),2,2),heights=c(0.3,1))
col.tmp = c(rainbow(length(taxorank)-2,start=0, end=0.5, alpha=0.6), "lightgrey", "darkgrey")
par(mar=c(1,0,0,0), oma=c(0,0,2,0))
frame()
legend("top", taxorank, ncol=6, cex=0.8, fill=col.tmp)
pie(taxores.otu, col=col.tmp, border="lightgrey", labels="", clockwise=T)
mtext("OTUs", side=1, cex=1)
pie(taxores.reads, col=col.tmp, border="lightgrey", labels="", clockwise=T)
mtext("Reads", side=1, cex=1)
#result
out = data.frame(otu=taxores.otu, reads=taxores.reads)
out
}

View File

@@ -0,0 +1,53 @@
#' @import ROBITaxonomy
#' @include 02_class_metabarcoding.data.R
NULL
#' Get classical taxonomy format
#'
#' Creates a table with the classical taxonomic description (from phylum to species)
#'
#' @param x a \code{\link{metabarcoding.data}} object
#' @param taxonomy a instance of \code{\linkS4class{taxonomy.obitools}}
#' @param coltaxid a the name of the column containing taxids to be used for creating classical taxonomic description
#'
#' @return returns a data.frame with the classical taxonomic description ("kingdom", "phylum", "class", "order", "family", "genus", "species"), as well as
#' sequence taxonomic assignment rank and scientific name for each sequences stored in the \code{\link{metabarcoding.data}} object
#'
#' @examples
#'
#' data(termes)
#'
#' taxo=default.taxonomy()
#'
#' termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid")
#' head(termes.taxo.table)
#'
#' attr(termes, "motus") = data.frame(termes$motus, termes.taxo.table)
#'
#'
#' @seealso \code{\linkS4class{taxonomy.obitools}}, and methods \code{\link{species}},\code{\link{genus}}, \code{\link{family}},\code{\link{kingdom}},
#' \code{\link{superkingdom}},\code{\link{taxonatrank}}, \code{\link{taxonmicank}}
#'
#' @author Lucie Zinger
#' @keywords taxonomy
#' @export
#'
get.classic.taxonomy = function(x, taxonomy, coltaxid) {
classic.taxo = c("kingdom", "phylum", "class", "order", "family", "genus", "species")
taxids = x$motus[,coltaxid]
out = as.data.frame(do.call("cbind", lapply(classic.taxo, function(y) {
scientificname(taxonomy, taxonatrank(taxonomy,taxids,y))
})))
colnames(out) = paste(classic.taxo, "_name_ok", sep="")
rownames(out) = colnames(x)
out$scientific_name_ok = scientificname(taxonomy, taxids)
out$taxonomic_rank_ok = taxonomicrank(taxonomy, taxids)
return(out)
}