Initial commit of the project

2019-02-06 17:16:08 +01:00
commit 0393da1883
47 changed files with 40413 additions and 0 deletions
--- a/R/entropy.R
+++ b/R/entropy.R
@@ -0,0 +1,23 @@
+#' @author Eric Coissac
+#' @export
+H.q = function(x,q=1) {
+  sum(x * log.q(1/x,q),na.rm = TRUE)
+}
+
+#' @author Eric Coissac
+#' @export
+D.q = function(x,q=1) {
+  exp.q(H.q(x,q),q)
+}
+
+#' @author Eric Coissac
+#' @export
+H.spectrum = function(x,q=1) {
+  sapply(q,function(Q) H.q(x,Q))
+}
+
+#' @author Eric Coissac
+#' @export
+D.spectrum = function(x,q=1) {
+  sapply(q,function(Q) D.q(x,Q))
+}
--- a/R/generalized_log.R
+++ b/R/generalized_log.R
@@ -0,0 +1,35 @@
+#' @importFrom Rdpack reprompt
+#'
+NULL
+
+#' Generalized logaritmic function.
+#'
+#' \deqn{x \longmapsto 1 : \log(x) \approx x-1}
+#'
+#'
+#' @references
+#'    \insertRef{Tsallis:94:00}{ROBITools2}
+#'
+#'
+#' @author Eric Coissac
+#' @export
+log.q = function(x,q=1) {
+  if (q==1)
+    log(x)
+  else (x^(1-q)-1)/(1-q)
+}
+
+#' Generalized exponential function.
+#'
+#'
+#' @references
+#'    \insertRef{Tsallis:94:00}{ROBITools2}
+#'
+#' @author Eric Coissac
+#' @export
+exp.q = function(x,q=1,base=exp(1)) {
+  if (q==1)
+    exp(x)
+  else
+    (1 + (1-q)*x)^(1/(1-q))
+}
--- a/R/select_pcr.R
+++ b/R/select_pcr.R
@@ -0,0 +1,63 @@
+#' @export
+mode <- function(x) {
+  d <- density(x)
+  d$x[which.max(d$y)]
+}
+
+#' @export
+tag_bad_pcr = function(samples,counts,plot = TRUE) {
+  counts = decostand(counts,method = "hellinger")
+  
+  bc = aggregate(counts,
+                 by=list(factor(as.character(samples))),
+                 mean)
+  bc.name = as.character(bc[,1])
+  bc = bc[-1]
+  rownames(bc)=bc.name
+  bc = bc[as.character(samples),]
+ 
+  d = sqrt(rowSums((counts - bc)^2))
+  names(d) = as.character(samples)
+  
+  d.m  = mode(d)
+  d.sd = sqrt(sum((d[d <= d.m] - d.m)^2)/sum(d <= d.m))
+  
+  d.max = aggregate(d,
+                    by = list(factor(as.character(samples))),
+                    max)
+  
+  d.max.names = d.max[,1]
+  d.max = d.max[,2]
+  names(d.max) = d.max.names
+  d.max = d.max[as.character(samples)]
+
+  d.len = aggregate(d,
+                    by = list(factor(as.character(samples))),
+                    length)
+  
+  d.len.names = d.len[,1]
+  d.len = d.len[,2]
+  names(d.len) = d.len.names
+  d.len = d.len[as.character(samples)]
+  
+  keep = ((d < d.m + (d.sd*2)) | d!=d.max) & d.len > 1
+  
+  selection = data.frame(samples = as.character(samples),
+                         distance= d,
+                         maximum = d.max,
+                         repeats = d.len,
+                         keep    = keep,
+                         stringsAsFactors = FALSE)
+  
+  rownames(selection)=rownames(counts)
+  attributes(selection)$dist.mode = d.m
+  attributes(selection)$dist.sd = d.sd
+  
+  if (plot) {
+    hist(d)
+    abline(v=d.m,lty=2,col="green")
+    abline(v=d.m + (d.sd*2),lty=2,col="red")
+  }
+    
+  return(selection)
+}