#' @include 02_class_metabarcoding.data.R #' @import igraph NULL require(igraph) # pos = expand.grid(x,y) #' Computes the pairwise distance matrix as a data.frame where #' #' @param x a vector for the X coordinates #' @param y a vector for the Y coordinates #' @param labels a vector with the sample names #' #' @return a data.frame instance of three columns #' - a : The label of the first sample #' - b : The label of the second sample #' - dist : The euclidian distance beween sample a and b #' #' @examples #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' pos = expand.grid(1:3 * 10,1:7 * 10) #' labels = rownames(termes.ok) #' d = dist.grid(pos[,1],pos[2],labels) #' #' @export dist.grid = function(x,y,labels=NULL){ pos = data.frame(x,y) if (is.null(labels)) labels = as.character(interaction(pos)) else labels = as.character(labels) llabels=length(labels) dpos=dist(pos) a = rep(labels[1:(llabels-1)],(llabels-1):1) b = do.call(c,(lapply(2:llabels, function(i) labels[i:llabels]))) return(data.frame(a,b,dist=as.vector(dpos))) } #' Builds the list of sample groups included in a circle around a central sample #' #' @param dtable a distance table between samples as #' computed by \code{\link{dist.grid}} #' @param radius the radius of the circle #' @param center a \code{logical} value indicating if the center of #' the group must be included in the group #' #' @return a list of vectors containing the labels of the group members #' #' @examples #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' pos = expand.grid(1:3 * 10,1:7 * 10) #' labels = rownames(termes.ok) #' d = dist.grid(pos[,1],pos[2],labels) #' groups = dist.center.group(d,20) #' #' @export dist.center.group=function(dtable,radius,center=TRUE) { fgroup = function(c) { ig = dtable[(dtable[,1]==c | dtable[,2]==c) & dtable[,3] <= radius,] return(union(ig[,1],ig[,2])) } pos = as.character(union(dtable[,1],dtable[,2])) g = lapply(pos,fgroup) names(g) = pos if (!center) g = mapply(setdiff,g,pos) return(g) } #' Builds the list of sample groups including samples closest than a define distance #' #' A graph is build by applying the threshold \code{dmax} to the distance matrix #' A group is a clique max in this graph. Consequently all member pairs of a group #' are distant by less or equal to \code{dmax}. #' #' @param dtable a distance table between samples as #' computed by \code{\link{dist.grid}} #' @param dmax the maximum distance between two samples #' #' @return a list of vectors containing the labels of the group members #' #' @examples #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' pos = expand.grid(1:3 * 10,1:7 * 10) #' labels = rownames(termes.ok) #' d = dist.grid(pos[,1],pos[2],labels) #' groups = dist.clique.group(d,20) #' #' @export dist.clique.group=function(dtable,dmax,center=True) { gp = igraph::graph.edgelist(as.matrix(dtable[dtable$dist <= dmax,c('a','b')]),directed=FALSE) g = igraph::maximal.cliques(gp) return(lapply(g, function(i) igraph::V(gp)$name[i])) } #' Computes the univariate M statistics #' #' @param w the weigth matrix indicating the presence probability of each motu #' in each samples. Each line corresponds to a sample and each column #' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample #' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id. #' #' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}} #' function #' #' @seealso \code{\link{dist.center.group}} #' @seealso \code{\link{m.weight}} #' #' @examples #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' pos = expand.grid(1:3 * 10,1:7 * 10) #' labels = rownames(termes.ok) #' d = dist.grid(pos[,1],pos[2],labels) #' groups = dist.center.group(d,20) #' w = m.weight(termes.ok) #' m = m.univariate(w,groups) #' #' @references Marcon, E., Puech, F., and Traissac, S. (2012). #' Characterizing the relative spatial structure of point patterns. #' International Journal of Ecology, 2012. #' #' @export m.univariate = function(w,groups) { nunivar = function(members,center) { g = w[members,] wn = colSums(g) wa = sum(wn) wn = wn - center wa = wa - center p = wn / wa * center return(p) } centers = lapply(names(groups),function(x) w[x,]) Wf = colSums(w) Wa = sum(Wf) Denom.univar = colSums(w * (sweep(-w,2,Wf,'+') / (Wa - w))) Num.univar = rowSums(mapply(nunivar,groups,centers)) Munivar=Num.univar/Denom.univar Munivar[Denom.univar==0]=0 return(Munivar) } #' Computes the bivariate M statistics #' #' The function computes the bivariate M statiscics for a set of target species around a set of #' focus species. #' #' @param w1 the weigth matrix indicating the presence probability of each motu #' used as focus species in each samples. Each line corresponds to a sample and each column #' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample #' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id. #' #' @param w2 the weigth matrix indicating the presence probability of each motu #' used as target species in each samples. Each line corresponds to a sample and each column #' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample #' names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id. #' if \code{w2} is not set, w1 is also used as target species. in this case the diagonal #' of the matrix return contains the univariate M statistic for the diferent species. #' #' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}} #' function #' #' @return a matrix of M bivariate statistics with one focus species by row and one target species #' by columns If \code{w2} is not specified the diagonal of the matrix is equal to the univariate #' M statistic of the corresponding species. #' #' @seealso \code{\link{dist.center.group}} #' @seealso \code{\link{m.weight}} #' #' @examples #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' pos = expand.grid(1:3 * 10,1:7 * 10) #' labels = rownames(termes.ok) #' d = dist.grid(pos[,1],pos[2],labels) #' groups = dist.center.group(d,20) #' w = m.weight(termes.ok) #' m = m.bivariate(w,groups) #' #' @references Marcon, E., Puech, F., and Traissac, S. (2012). #' Characterizing the relative spatial structure of point patterns. #' International Journal of Ecology, 2012. #' #' @export m.bivariate = function(w1,w2=NULL,groups) { nunbivar = function(members,center) { g = w2[members,] wn = colSums(g) wa = sum(wn) if (self){ mwn = wn %*% t(rep(1,length(wn))) diag(mwn)= wn - center wa = wa - center wna = mwn/wa p = sweep(wna,2,center,'*') #p = center %*% wna } else { wna= matrix(wn/wa,nrow=1) p = center %*% wna } return(p) } if (is.null(w2)){ self = TRUE w2=w1 } else { self = FALSE } centers = lapply(names(groups),function(x) w[x,]) Wf = colSums(w1) Wn = colSums(w2) Wa = sum(Wn) if (self){ Wn = sweep(-w1,2,Wn,'+') Wna = Wn/(Wa - w1) Denom.bivar = t(w1) %*% Wna } else { Wna= t(Wn/Wa) Denom.bivar = Wf %*% Wna } Num.bivar = matrix(0,nrow=ncol(w1),ncol=ncol(w2)) ng = length(groups) for (i in 1:ng) { Num.bivar = Num.bivar + nunbivar(groups[[i]],centers[[i]]) } Mbivar=Num.bivar/Denom.bivar Mbivar[Denom.bivar==0]=0 return(Mbivar) } #' Computes a weigth matrix from a \code{\linkS4class{metabarcoding.data}} #' #' The weight can be considered as a propability of presence of a MOTU in a #' given sample. This function defines this probability as the fraction of #' the maximal occurrence frequency over all samples. #' #' @param data a \code{\linkS4class{metabarcoding.data}} instance #' #' @return a weight matrix usable for M statistics #' #' @examples #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' w = m.weight(termes.ok) #' #' @export m.weight = function(data) { ndata = normalize(data,MARGIN='sample') fmax=apply(ndata$reads,2,max) w = sweep(ndata$reads,2,fmax,'/') rownames(w)=rownames(ndata) colnames(w)=colnames(ndata) return(w) } #' Simulate null distribion of the M statistics by Monte-Carlo #' #' Computes the null empirical distribution of the M statistics #' by shuffling MOTUs among location. #' #' @param w the weigth matrix indicating the presence probability of each motu #' in each samples. Each line corresponds to a sample and each column #' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample #' names. #' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}} #' function #' @param resampling the number of simulation to establish the null distribution #' #' @return a matrix of M score under the null hypothesis of random distribution of MOTUs #' with a MOTUs per line and a culumn per simulation #' #' @examples #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' pos = expand.grid(1:3 * 10,1:7 * 10) #' labels = rownames(termes.ok) #' d = dist.grid(pos[,1],pos[2],labels) #' groups = dist.center.group(d,20) #' w = m.weight(termes.ok) #' dnull = dm.univariate(w,groups) #' #' @export dm.univariate = function(w,groups,resampling=100) { shuffle = function(w){ wr =apply(w,2,function(y) sample(y,length(y),replace=FALSE)) rownames(wr)=rownames(w) return(wr) } msim = function(x) { return(m.univariate(shuffle(w),groups)) } dnull = mapply(msim,1:resampling) rownames(dnull) = colnames(w) return(dnull) } #' Test the significance of the M statistics by Monte-Carlo #' #' Computes computes the p.value the M statistics asociated to a MOTU #' by shuffling MOTUs among location. #' #' @param w the weigth matrix indicating the presence probability of each motu #' in each samples. Each line corresponds to a sample and each column #' to a MOTU. \code{rownames} of the \code{w} matrix must be the sample #' names. #' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}} #' function #' @param resampling the number of simulation to establish the null distribution #' #' @param alternative a character value in \code{c('two.sided','less','greater')} #' - two.sided : the m stat is check against the two side of the empirical #' M distribution #' - less : test if the M stat is lesser than the M observed in the the empirical #' M distribution (exlusion hypothesis) #' - greater : test if the M stat is greater than the M observed in the the empirical #' M distribution (aggregation hypothesis) #' #' @return a vector of p.value with an attribute \code{m.stat} containing the actual M stat #' for each MOTUs #' #' @examples #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' pos = expand.grid(1:3 * 10,1:7 * 10) #' labels = rownames(termes.ok) #' d = dist.grid(pos[,1],pos[2],labels) #' groups = dist.center.group(d,20) #' w = m.weight(termes.ok) #' pval = m.univariate.test(w,groups) #' #' @export m.univariate.test = function(w,groups,resampling=100,alternative='two.sided') { dnull = dm.univariate(w,groups,resampling) m = m.univariate(w,groups) pnull = sapply(1:dim(dnull)[1],function(y) 1 - ecdf(dnull[y,])(m[y])) p.value=NULL if (alternative=='two.sided') { p.value = mapply(min,pnull,1 - pnull) } if (alternative=='less') { p.value = pnull } if (alternative=='greater') { p.value = 1 - pnull } # Set p.value to 1 if the MOTU occurres in only one place n = colSums(w > 0) p.value[n==1]=1 names(p.value) = colnames(w) attr(p.value,'m.stat')=m return(p.value) }