#' @export mode <- function(x) { d <- density(x) d$x[which.max(d$y)] } #' @export tag_bad_pcr = function(samples,counts,plot = TRUE) { counts = decostand(counts,method = "hellinger") bc = aggregate(counts, by=list(factor(as.character(samples))), mean) bc.name = as.character(bc[,1]) bc = bc[-1] rownames(bc)=bc.name bc = bc[as.character(samples),] d = sqrt(rowSums((counts - bc)^2)) names(d) = as.character(samples) d.m = mode(d) d.sd = sqrt(sum((d[d <= d.m] - d.m)^2)/sum(d <= d.m)) d.max = aggregate(d, by = list(factor(as.character(samples))), max) d.max.names = d.max[,1] d.max = d.max[,2] names(d.max) = d.max.names d.max = d.max[as.character(samples)] d.len = aggregate(d, by = list(factor(as.character(samples))), length) d.len.names = d.len[,1] d.len = d.len[,2] names(d.len) = d.len.names d.len = d.len[as.character(samples)] keep = ((d < d.m + (d.sd*2)) | d!=d.max) & d.len > 1 selection = data.frame(samples = as.character(samples), distance= d, maximum = d.max, repeats = d.len, keep = keep, stringsAsFactors = FALSE) rownames(selection)=rownames(counts) attributes(selection)$dist.mode = d.m attributes(selection)$dist.sd = d.sd if (plot) { hist(d) abline(v=d.m,lty=2,col="green") abline(v=d.m + (d.sd*2),lty=2,col="red") } return(selection) }