Files
biodiversity-metrics/R/select_pcr.R

64 lines
1.6 KiB
R

#' @export
mode <- function(x) {
d <- density(x)
d$x[which.max(d$y)]
}
#' @export
tag_bad_pcr = function(samples,counts,plot = TRUE) {
counts = decostand(counts,method = "hellinger")
bc = aggregate(counts,
by=list(factor(as.character(samples))),
mean)
bc.name = as.character(bc[,1])
bc = bc[-1]
rownames(bc)=bc.name
bc = bc[as.character(samples),]
d = sqrt(rowSums((counts - bc)^2))
names(d) = as.character(samples)
d.m = mode(d)
d.sd = sqrt(sum((d[d <= d.m] - d.m)^2)/sum(d <= d.m))
d.max = aggregate(d,
by = list(factor(as.character(samples))),
max)
d.max.names = d.max[,1]
d.max = d.max[,2]
names(d.max) = d.max.names
d.max = d.max[as.character(samples)]
d.len = aggregate(d,
by = list(factor(as.character(samples))),
length)
d.len.names = d.len[,1]
d.len = d.len[,2]
names(d.len) = d.len.names
d.len = d.len[as.character(samples)]
keep = ((d < d.m + (d.sd*2)) | d!=d.max) & d.len > 1
selection = data.frame(samples = as.character(samples),
distance= d,
maximum = d.max,
repeats = d.len,
keep = keep,
stringsAsFactors = FALSE)
rownames(selection)=rownames(counts)
attributes(selection)$dist.mode = d.m
attributes(selection)$dist.sd = d.sd
if (plot) {
hist(d)
abline(v=d.m,lty=2,col="green")
abline(v=d.m + (d.sd*2),lty=2,col="red")
}
return(selection)
}