Change the package path

2018-02-20 06:40:29 +11:00
parent 0450ebf427
commit 51f152cca4
48 changed files with 0 additions and 3 deletions
--- a/R/mstat.R
+++ b/R/mstat.R
@@ -0,0 +1,407 @@
+#' @include 02_class_metabarcoding.data.R
+#' @import igraph
+NULL
+
+require(igraph)
+
+# pos = expand.grid(x,y)
+
+#' Computes the pairwise distance matrix as a data.frame where
+#' 
+#' @param x a vector for the X coordinates
+#' @param y a vector for the Y coordinates
+#' @param labels a vector with the sample names
+#' 
+#' @return a data.frame instance of three columns
+#'    - a : The label of the first sample
+#'    - b : The label of the second sample
+#'    - dist : The euclidian distance beween sample a and b
+#' 
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' 
+#' @export
+dist.grid = function(x,y,labels=NULL){
+  pos = data.frame(x,y)
+  
+  if (is.null(labels))
+    labels = as.character(interaction(pos))
+  else
+    labels = as.character(labels)
+
+  llabels=length(labels)
+  dpos=dist(pos)
+
+  a = rep(labels[1:(llabels-1)],(llabels-1):1)
+  b = do.call(c,(lapply(2:llabels, function(i) labels[i:llabels])))
+
+  return(data.frame(a,b,dist=as.vector(dpos)))
+}
+
+#' Builds the list of sample groups included in a circle around a central sample
+#' 
+#' @param dtable a distance table between samples as 
+#'               computed by \code{\link{dist.grid}}
+#' @param radius the radius of the circle
+#' @param center a \code{logical} value indicating if the center of
+#'               the group must be included in the group
+#'               
+#' @return a list of vectors containing the labels of the group members 
+#'          
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' 
+#' @export
+dist.center.group=function(dtable,radius,center=TRUE) {
+  
+  fgroup = function(c) {
+    ig = dtable[(dtable[,1]==c | dtable[,2]==c) & dtable[,3] <= radius,]
+    return(union(ig[,1],ig[,2]))
+  }
+  
+  pos = as.character(union(dtable[,1],dtable[,2]))
+  
+  g = lapply(pos,fgroup)
+  names(g) = pos
+  
+  if (!center)
+    g = mapply(setdiff,g,pos)
+  
+  return(g)
+  
+}
+
+#' Builds the list of sample groups including samples closest than a define distance
+#' 
+#' A graph is build by applying the threshold \code{dmax} to the distance matrix
+#' A group is a clique max in this graph. Consequently all member pairs of a group
+#' are distant by less or equal to \code{dmax}.
+#' 
+#' @param dtable a distance table between samples as 
+#'               computed by \code{\link{dist.grid}}
+#' @param dmax the maximum distance between two samples
+#'               
+#' @return a list of vectors containing the labels of the group members 
+#'          
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.clique.group(d,20)
+#' 
+#' @export
+dist.clique.group=function(dtable,dmax,center=True) {
+  gp = igraph::graph.edgelist(as.matrix(dtable[dtable$dist <= dmax,c('a','b')]),directed=FALSE)
+  g  = igraph::maximal.cliques(gp)
+  return(lapply(g, function(i) igraph::V(gp)$name[i]))
+}
+
+#' Computes the univariate M statistics 
+#' 
+#' @param w the weigth matrix indicating the presence probability of each motu
+#'          in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names.  It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
+#'          
+#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
+#'               function
+#'
+#' @seealso \code{\link{dist.center.group}}
+#' @seealso \code{\link{m.weight}}
+#' 
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' w = m.weight(termes.ok)
+#' m = m.univariate(w,groups)
+#' 
+#' @references Marcon, E., Puech, F., and Traissac, S. (2012). 
+#'             Characterizing the relative spatial structure of point patterns. 
+#'             International Journal of Ecology, 2012.
+#'             
+#' @export
+m.univariate = function(w,groups) {
+  
+  nunivar = function(members,center) {
+    g = w[members,]
+    
+    wn = colSums(g)
+    wa = sum(wn)
+ 
+    wn = wn - center
+    wa = wa - center
+    
+    p = wn / wa * center
+        
+    return(p)
+  }
+
+  centers = lapply(names(groups),function(x) w[x,])
+  
+  Wf = colSums(w)
+  Wa = sum(Wf)
+
+  Denom.univar = colSums(w * (sweep(-w,2,Wf,'+') / (Wa - w)))
+  Num.univar   = rowSums(mapply(nunivar,groups,centers))
+  
+  Munivar=Num.univar/Denom.univar
+  Munivar[Denom.univar==0]=0
+  
+  return(Munivar)
+}
+
+
+#' Computes the bivariate M statistics 
+#' 
+#' The function computes the bivariate M statiscics for a set of target species around a set of
+#' focus species.
+#' 
+#' @param w1 the weigth matrix indicating the presence probability of each motu
+#'          used as focus species in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
+#'          
+#' @param w2 the weigth matrix indicating the presence probability of each motu
+#'          used as target species in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
+#'          if \code{w2} is not set, w1 is also used as target species. in this case the diagonal
+#'          of the matrix return contains the univariate M statistic for the diferent species.
+#'          
+#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
+#'               function
+#'               
+#' @return a matrix of M bivariate statistics with one focus species by row and one target species 
+#'         by columns If \code{w2} is not specified the diagonal of the matrix is equal to the univariate
+#'         M statistic of the corresponding species.
+#'
+#' @seealso \code{\link{dist.center.group}}
+#' @seealso \code{\link{m.weight}}
+#' 
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' w = m.weight(termes.ok)
+#' m = m.bivariate(w,groups)
+#' 
+#' @references Marcon, E., Puech, F., and Traissac, S. (2012). 
+#'             Characterizing the relative spatial structure of point patterns. 
+#'             International Journal of Ecology, 2012.
+#'             
+#' @export
+m.bivariate = function(w1,w2=NULL,groups) {
+  
+  nunbivar = function(members,center) {
+    g = w2[members,]
+    
+    wn = colSums(g)
+    wa = sum(wn)
+    
+    if (self){
+      mwn  = wn %*% t(rep(1,length(wn)))
+      diag(mwn)= wn - center
+      wa = wa - center
+      wna = mwn/wa
+      p = sweep(wna,2,center,'*')
+      #p = center %*% wna
+    }
+    else {
+      wna= matrix(wn/wa,nrow=1)
+      p = center %*% wna
+    }    
+    
+    return(p)
+  }
+  
+  if (is.null(w2)){
+    self = TRUE
+    w2=w1
+  }
+  else {
+    self = FALSE
+  }
+  
+  centers = lapply(names(groups),function(x) w[x,])
+
+  Wf = colSums(w1)
+  Wn = colSums(w2)
+  Wa = sum(Wn)
+  
+  if (self){      
+    Wn = sweep(-w1,2,Wn,'+')
+    Wna = Wn/(Wa - w1)
+    Denom.bivar = t(w1) %*% Wna
+  }
+  else {
+    Wna= t(Wn/Wa)
+    Denom.bivar = Wf %*% Wna    
+  }
+  
+  Num.bivar = matrix(0,nrow=ncol(w1),ncol=ncol(w2))
+
+  ng = length(groups)
+  
+  for (i in 1:ng) {
+    Num.bivar = Num.bivar + nunbivar(groups[[i]],centers[[i]])
+  }
+  
+  Mbivar=Num.bivar/Denom.bivar
+
+  Mbivar[Denom.bivar==0]=0
+  
+  return(Mbivar)
+}
+
+#' Computes a weigth matrix from a \code{\linkS4class{metabarcoding.data}}
+#' 
+#' The weight can be considered as a propability of presence of a MOTU in a 
+#' given sample. This function defines this probability as the fraction of
+#' the maximal occurrence frequency over all samples. 
+#' 
+#' @param data a \code{\linkS4class{metabarcoding.data}} instance
+#' 
+#' @return a weight matrix usable for M statistics
+#' 
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' w = m.weight(termes.ok)
+#' 
+#' @export
+m.weight = function(data) {
+  ndata = normalize(data,MARGIN='sample')
+  fmax=apply(ndata$reads,2,max)
+  w = sweep(ndata$reads,2,fmax,'/')
+  rownames(w)=rownames(ndata)
+  colnames(w)=colnames(ndata)
+  return(w)
+}
+
+#' Simulate null distribion of the M statistics by Monte-Carlo
+#' 
+#' Computes the null empirical distribution of the M statistics
+#' by shuffling MOTUs among location.
+#' 
+#' @param w the weigth matrix indicating the presence probability of each motu
+#'          in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names.
+#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
+#'               function
+#' @param resampling the number of simulation to establish the null distribution
+#' 
+#' @return a matrix of M score under the null hypothesis of random distribution of MOTUs
+#'         with a MOTUs per line and a culumn per simulation
+#'
+#' @examples       
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' w = m.weight(termes.ok)
+#' dnull = dm.univariate(w,groups)
+#' 
+#' @export              
+dm.univariate = function(w,groups,resampling=100) {
+  
+  shuffle = function(w){
+    wr =apply(w,2,function(y) sample(y,length(y),replace=FALSE))
+    rownames(wr)=rownames(w)
+    return(wr)
+  }
+  
+  msim = function(x) {
+    return(m.univariate(shuffle(w),groups))
+  }
+  
+  dnull = mapply(msim,1:resampling)
+  
+  rownames(dnull) = colnames(w)
+  
+  return(dnull)
+}
+
+#' Test the significance of the M statistics by Monte-Carlo
+#' 
+#' Computes computes the p.value the M statistics asociated to a MOTU
+#' by shuffling MOTUs among location.
+#' 
+#' @param w the weigth matrix indicating the presence probability of each motu
+#'          in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names.
+#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
+#'               function
+#' @param resampling the number of simulation to establish the null distribution
+#' 
+#' @param alternative a character value in \code{c('two.sided','less','greater')}
+#'               - two.sided : the m stat is check against the two side of the empirical
+#'                             M distribution
+#'               - less : test if the M stat is lesser than the M observed in the the empirical
+#'                             M distribution (exlusion hypothesis)
+#'               - greater : test if the M stat is greater than the M observed in the the empirical
+#'                             M distribution (aggregation hypothesis)
+#' 
+#' @return a vector of p.value with an attribute \code{m.stat} containing the actual M stat
+#'         for each MOTUs
+#'
+#' @examples       
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' w = m.weight(termes.ok)
+#' pval = m.univariate.test(w,groups)
+#' 
+#' @export              
+m.univariate.test = function(w,groups,resampling=100,alternative='two.sided') {
+  dnull = dm.univariate(w,groups,resampling)
+  m = m.univariate(w,groups)
+  pnull = sapply(1:dim(dnull)[1],function(y) 1 - ecdf(dnull[y,])(m[y]))
+  
+  p.value=NULL
+  
+  if (alternative=='two.sided') {
+    p.value = mapply(min,pnull,1 - pnull)
+  }
+  
+  if (alternative=='less') {
+    p.value = pnull
+  }
+
+  if (alternative=='greater') {
+    p.value = 1 - pnull
+  }
+  
+  # Set p.value to 1 if the MOTU occurres in only one place
+  n = colSums(w > 0)
+  p.value[n==1]=1
+  
+  names(p.value) = colnames(w)
+  attr(p.value,'m.stat')=m
+  
+  return(p.value)
+}