408 lines
12 KiB
R
408 lines
12 KiB
R
#' @include 02_class_metabarcoding.data.R
|
|
#' @import igraph
|
|
NULL
|
|
|
|
require(igraph)
|
|
|
|
# pos = expand.grid(x,y)
|
|
|
|
#' Computes the pairwise distance matrix as a data.frame where
|
|
#'
|
|
#' @param x a vector for the X coordinates
|
|
#' @param y a vector for the Y coordinates
|
|
#' @param labels a vector with the sample names
|
|
#'
|
|
#' @return a data.frame instance of three columns
|
|
#' - a : The label of the first sample
|
|
#' - b : The label of the second sample
|
|
#' - dist : The euclidian distance beween sample a and b
|
|
#'
|
|
#' @examples
|
|
#' data(termes)
|
|
#' termes.ok = termes[,colSums(termes$reads)>0]
|
|
#' pos = expand.grid(1:3 * 10,1:7 * 10)
|
|
#' labels = rownames(termes.ok)
|
|
#' d = dist.grid(pos[,1],pos[2],labels)
|
|
#'
|
|
#' @export
|
|
dist.grid = function(x,y,labels=NULL){
|
|
pos = data.frame(x,y)
|
|
|
|
if (is.null(labels))
|
|
labels = as.character(interaction(pos))
|
|
else
|
|
labels = as.character(labels)
|
|
|
|
llabels=length(labels)
|
|
dpos=dist(pos)
|
|
|
|
a = rep(labels[1:(llabels-1)],(llabels-1):1)
|
|
b = do.call(c,(lapply(2:llabels, function(i) labels[i:llabels])))
|
|
|
|
return(data.frame(a,b,dist=as.vector(dpos)))
|
|
}
|
|
|
|
#' Builds the list of sample groups included in a circle around a central sample
|
|
#'
|
|
#' @param dtable a distance table between samples as
|
|
#' computed by \code{\link{dist.grid}}
|
|
#' @param radius the radius of the circle
|
|
#' @param center a \code{logical} value indicating if the center of
|
|
#' the group must be included in the group
|
|
#'
|
|
#' @return a list of vectors containing the labels of the group members
|
|
#'
|
|
#' @examples
|
|
#' data(termes)
|
|
#' termes.ok = termes[,colSums(termes$reads)>0]
|
|
#' pos = expand.grid(1:3 * 10,1:7 * 10)
|
|
#' labels = rownames(termes.ok)
|
|
#' d = dist.grid(pos[,1],pos[2],labels)
|
|
#' groups = dist.center.group(d,20)
|
|
#'
|
|
#' @export
|
|
dist.center.group=function(dtable,radius,center=TRUE) {
|
|
|
|
fgroup = function(c) {
|
|
ig = dtable[(dtable[,1]==c | dtable[,2]==c) & dtable[,3] <= radius,]
|
|
return(union(ig[,1],ig[,2]))
|
|
}
|
|
|
|
pos = as.character(union(dtable[,1],dtable[,2]))
|
|
|
|
g = lapply(pos,fgroup)
|
|
names(g) = pos
|
|
|
|
if (!center)
|
|
g = mapply(setdiff,g,pos)
|
|
|
|
return(g)
|
|
|
|
}
|
|
|
|
#' Builds the list of sample groups including samples closest than a define distance
|
|
#'
|
|
#' A graph is build by applying the threshold \code{dmax} to the distance matrix
|
|
#' A group is a clique max in this graph. Consequently all member pairs of a group
|
|
#' are distant by less or equal to \code{dmax}.
|
|
#'
|
|
#' @param dtable a distance table between samples as
|
|
#' computed by \code{\link{dist.grid}}
|
|
#' @param dmax the maximum distance between two samples
|
|
#'
|
|
#' @return a list of vectors containing the labels of the group members
|
|
#'
|
|
#' @examples
|
|
#' data(termes)
|
|
#' termes.ok = termes[,colSums(termes$reads)>0]
|
|
#' pos = expand.grid(1:3 * 10,1:7 * 10)
|
|
#' labels = rownames(termes.ok)
|
|
#' d = dist.grid(pos[,1],pos[2],labels)
|
|
#' groups = dist.clique.group(d,20)
|
|
#'
|
|
#' @export
|
|
dist.clique.group=function(dtable,dmax,center=True) {
|
|
gp = igraph::graph.edgelist(as.matrix(dtable[dtable$dist <= dmax,c('a','b')]),directed=FALSE)
|
|
g = igraph::maximal.cliques(gp)
|
|
return(lapply(g, function(i) igraph::V(gp)$name[i]))
|
|
}
|
|
|
|
#' Computes the univariate M statistics
|
|
#'
|
|
#' @param w the weigth matrix indicating the presence probability of each motu
|
|
#' in each samples. Each line corresponds to a sample and each column
|
|
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
|
|
#' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
|
|
#'
|
|
#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
|
|
#' function
|
|
#'
|
|
#' @seealso \code{\link{dist.center.group}}
|
|
#' @seealso \code{\link{m.weight}}
|
|
#'
|
|
#' @examples
|
|
#' data(termes)
|
|
#' termes.ok = termes[,colSums(termes$reads)>0]
|
|
#' pos = expand.grid(1:3 * 10,1:7 * 10)
|
|
#' labels = rownames(termes.ok)
|
|
#' d = dist.grid(pos[,1],pos[2],labels)
|
|
#' groups = dist.center.group(d,20)
|
|
#' w = m.weight(termes.ok)
|
|
#' m = m.univariate(w,groups)
|
|
#'
|
|
#' @references Marcon, E., Puech, F., and Traissac, S. (2012).
|
|
#' Characterizing the relative spatial structure of point patterns.
|
|
#' International Journal of Ecology, 2012.
|
|
#'
|
|
#' @export
|
|
m.univariate = function(w,groups) {
|
|
|
|
nunivar = function(members,center) {
|
|
g = w[members,]
|
|
|
|
wn = colSums(g)
|
|
wa = sum(wn)
|
|
|
|
wn = wn - center
|
|
wa = wa - center
|
|
|
|
p = wn / wa * center
|
|
|
|
return(p)
|
|
}
|
|
|
|
centers = lapply(names(groups),function(x) w[x,])
|
|
|
|
Wf = colSums(w)
|
|
Wa = sum(Wf)
|
|
|
|
Denom.univar = colSums(w * (sweep(-w,2,Wf,'+') / (Wa - w)))
|
|
Num.univar = rowSums(mapply(nunivar,groups,centers))
|
|
|
|
Munivar=Num.univar/Denom.univar
|
|
Munivar[Denom.univar==0]=0
|
|
|
|
return(Munivar)
|
|
}
|
|
|
|
|
|
#' Computes the bivariate M statistics
|
|
#'
|
|
#' The function computes the bivariate M statiscics for a set of target species around a set of
|
|
#' focus species.
|
|
#'
|
|
#' @param w1 the weigth matrix indicating the presence probability of each motu
|
|
#' used as focus species in each samples. Each line corresponds to a sample and each column
|
|
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
|
|
#' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
|
|
#'
|
|
#' @param w2 the weigth matrix indicating the presence probability of each motu
|
|
#' used as target species in each samples. Each line corresponds to a sample and each column
|
|
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
|
|
#' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
|
|
#' if \code{w2} is not set, w1 is also used as target species. in this case the diagonal
|
|
#' of the matrix return contains the univariate M statistic for the diferent species.
|
|
#'
|
|
#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
|
|
#' function
|
|
#'
|
|
#' @return a matrix of M bivariate statistics with one focus species by row and one target species
|
|
#' by columns If \code{w2} is not specified the diagonal of the matrix is equal to the univariate
|
|
#' M statistic of the corresponding species.
|
|
#'
|
|
#' @seealso \code{\link{dist.center.group}}
|
|
#' @seealso \code{\link{m.weight}}
|
|
#'
|
|
#' @examples
|
|
#' data(termes)
|
|
#' termes.ok = termes[,colSums(termes$reads)>0]
|
|
#' pos = expand.grid(1:3 * 10,1:7 * 10)
|
|
#' labels = rownames(termes.ok)
|
|
#' d = dist.grid(pos[,1],pos[2],labels)
|
|
#' groups = dist.center.group(d,20)
|
|
#' w = m.weight(termes.ok)
|
|
#' m = m.bivariate(w,groups)
|
|
#'
|
|
#' @references Marcon, E., Puech, F., and Traissac, S. (2012).
|
|
#' Characterizing the relative spatial structure of point patterns.
|
|
#' International Journal of Ecology, 2012.
|
|
#'
|
|
#' @export
|
|
m.bivariate = function(w1,w2=NULL,groups) {
|
|
|
|
nunbivar = function(members,center) {
|
|
g = w2[members,]
|
|
|
|
wn = colSums(g)
|
|
wa = sum(wn)
|
|
|
|
if (self){
|
|
mwn = wn %*% t(rep(1,length(wn)))
|
|
diag(mwn)= wn - center
|
|
wa = wa - center
|
|
wna = mwn/wa
|
|
p = sweep(wna,2,center,'*')
|
|
#p = center %*% wna
|
|
}
|
|
else {
|
|
wna= matrix(wn/wa,nrow=1)
|
|
p = center %*% wna
|
|
}
|
|
|
|
return(p)
|
|
}
|
|
|
|
if (is.null(w2)){
|
|
self = TRUE
|
|
w2=w1
|
|
}
|
|
else {
|
|
self = FALSE
|
|
}
|
|
|
|
centers = lapply(names(groups),function(x) w[x,])
|
|
|
|
Wf = colSums(w1)
|
|
Wn = colSums(w2)
|
|
Wa = sum(Wn)
|
|
|
|
if (self){
|
|
Wn = sweep(-w1,2,Wn,'+')
|
|
Wna = Wn/(Wa - w1)
|
|
Denom.bivar = t(w1) %*% Wna
|
|
}
|
|
else {
|
|
Wna= t(Wn/Wa)
|
|
Denom.bivar = Wf %*% Wna
|
|
}
|
|
|
|
Num.bivar = matrix(0,nrow=ncol(w1),ncol=ncol(w2))
|
|
|
|
ng = length(groups)
|
|
|
|
for (i in 1:ng) {
|
|
Num.bivar = Num.bivar + nunbivar(groups[[i]],centers[[i]])
|
|
}
|
|
|
|
Mbivar=Num.bivar/Denom.bivar
|
|
|
|
Mbivar[Denom.bivar==0]=0
|
|
|
|
return(Mbivar)
|
|
}
|
|
|
|
#' Computes a weigth matrix from a \code{\linkS4class{metabarcoding.data}}
|
|
#'
|
|
#' The weight can be considered as a propability of presence of a MOTU in a
|
|
#' given sample. This function defines this probability as the fraction of
|
|
#' the maximal occurrence frequency over all samples.
|
|
#'
|
|
#' @param data a \code{\linkS4class{metabarcoding.data}} instance
|
|
#'
|
|
#' @return a weight matrix usable for M statistics
|
|
#'
|
|
#' @examples
|
|
#' data(termes)
|
|
#' termes.ok = termes[,colSums(termes$reads)>0]
|
|
#' w = m.weight(termes.ok)
|
|
#'
|
|
#' @export
|
|
m.weight = function(data) {
|
|
ndata = normalize(data,MARGIN='sample')
|
|
fmax=apply(ndata$reads,2,max)
|
|
w = sweep(ndata$reads,2,fmax,'/')
|
|
rownames(w)=rownames(ndata)
|
|
colnames(w)=colnames(ndata)
|
|
return(w)
|
|
}
|
|
|
|
#' Simulate null distribion of the M statistics by Monte-Carlo
|
|
#'
|
|
#' Computes the null empirical distribution of the M statistics
|
|
#' by shuffling MOTUs among location.
|
|
#'
|
|
#' @param w the weigth matrix indicating the presence probability of each motu
|
|
#' in each samples. Each line corresponds to a sample and each column
|
|
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
|
|
#' names.
|
|
#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
|
|
#' function
|
|
#' @param resampling the number of simulation to establish the null distribution
|
|
#'
|
|
#' @return a matrix of M score under the null hypothesis of random distribution of MOTUs
|
|
#' with a MOTUs per line and a culumn per simulation
|
|
#'
|
|
#' @examples
|
|
#' data(termes)
|
|
#' termes.ok = termes[,colSums(termes$reads)>0]
|
|
#' pos = expand.grid(1:3 * 10,1:7 * 10)
|
|
#' labels = rownames(termes.ok)
|
|
#' d = dist.grid(pos[,1],pos[2],labels)
|
|
#' groups = dist.center.group(d,20)
|
|
#' w = m.weight(termes.ok)
|
|
#' dnull = dm.univariate(w,groups)
|
|
#'
|
|
#' @export
|
|
dm.univariate = function(w,groups,resampling=100) {
|
|
|
|
shuffle = function(w){
|
|
wr =apply(w,2,function(y) sample(y,length(y),replace=FALSE))
|
|
rownames(wr)=rownames(w)
|
|
return(wr)
|
|
}
|
|
|
|
msim = function(x) {
|
|
return(m.univariate(shuffle(w),groups))
|
|
}
|
|
|
|
dnull = mapply(msim,1:resampling)
|
|
|
|
rownames(dnull) = colnames(w)
|
|
|
|
return(dnull)
|
|
}
|
|
|
|
#' Test the significance of the M statistics by Monte-Carlo
|
|
#'
|
|
#' Computes computes the p.value the M statistics asociated to a MOTU
|
|
#' by shuffling MOTUs among location.
|
|
#'
|
|
#' @param w the weigth matrix indicating the presence probability of each motu
|
|
#' in each samples. Each line corresponds to a sample and each column
|
|
#' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
|
|
#' names.
|
|
#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
|
|
#' function
|
|
#' @param resampling the number of simulation to establish the null distribution
|
|
#'
|
|
#' @param alternative a character value in \code{c('two.sided','less','greater')}
|
|
#' - two.sided : the m stat is check against the two side of the empirical
|
|
#' M distribution
|
|
#' - less : test if the M stat is lesser than the M observed in the the empirical
|
|
#' M distribution (exlusion hypothesis)
|
|
#' - greater : test if the M stat is greater than the M observed in the the empirical
|
|
#' M distribution (aggregation hypothesis)
|
|
#'
|
|
#' @return a vector of p.value with an attribute \code{m.stat} containing the actual M stat
|
|
#' for each MOTUs
|
|
#'
|
|
#' @examples
|
|
#' data(termes)
|
|
#' termes.ok = termes[,colSums(termes$reads)>0]
|
|
#' pos = expand.grid(1:3 * 10,1:7 * 10)
|
|
#' labels = rownames(termes.ok)
|
|
#' d = dist.grid(pos[,1],pos[2],labels)
|
|
#' groups = dist.center.group(d,20)
|
|
#' w = m.weight(termes.ok)
|
|
#' pval = m.univariate.test(w,groups)
|
|
#'
|
|
#' @export
|
|
m.univariate.test = function(w,groups,resampling=100,alternative='two.sided') {
|
|
dnull = dm.univariate(w,groups,resampling)
|
|
m = m.univariate(w,groups)
|
|
pnull = sapply(1:dim(dnull)[1],function(y) 1 - ecdf(dnull[y,])(m[y]))
|
|
|
|
p.value=NULL
|
|
|
|
if (alternative=='two.sided') {
|
|
p.value = mapply(min,pnull,1 - pnull)
|
|
}
|
|
|
|
if (alternative=='less') {
|
|
p.value = pnull
|
|
}
|
|
|
|
if (alternative=='greater') {
|
|
p.value = 1 - pnull
|
|
}
|
|
|
|
# Set p.value to 1 if the MOTU occurres in only one place
|
|
n = colSums(w > 0)
|
|
p.value[n==1]=1
|
|
|
|
names(p.value) = colnames(w)
|
|
attr(p.value,'m.stat')=m
|
|
|
|
return(p.value)
|
|
}
|