Files
ROBITools/R/contaslayer.R
2018-02-20 06:40:29 +11:00

50 lines
1.6 KiB
R

#' @include 02_class_metabarcoding.data.R
NULL
#' Detects contaminants in metabarcoding data
#'
#' Detects sequences/motus in a \code{\link{metabarcoding.data}} object
#' for which frequencies over the entire dataset are maximum in negative controls and
#' hence, most likely to be contaminants.
#'
#'
#' @param x a \code{\link{metabarcoding.data}} object
#' @param controls a vector of samples names where conta are suspected to be detected
#' (typically negative control names).
#' @param clust a vector for grouping sequences. Default set to \code{NULL}.
#'
#' @return a vector containing the names of sequences identified as contaminants
#'
#' @examples
#'
#' data(termes)
#' termes.ok = termes[,colSums(termes$reads)>0]
#' neg = rownames(termes.ok)[grep("r",rownames(termes.ok))]
#'
#' #finds contaminants based on neg samples
#' contaslayer(termes.ok, neg)
#'
#' # extanding contamininant detection with grouping factor,
#' # typically obiclean/sumatra cluster or taxonomy membership
#' contaslayer(termes.ok, neg, termes.ok$motus$scientific_name)
#'
#' @seealso \code{\link{threshold}} for further trimming
#' @author Lucie Zinger
#' @export
contaslayer = function(x,controls,clust=NULL){
x.fcol = normalize(x, MARGIN=2)$reads
x.max = rownames(x.fcol[apply(x.fcol, 2, which.max),])
conta = colnames(x)[!is.na(match(x.max,controls))]
if (length(clust)!=0) {
agg = data.frame(conta.id=colnames(x.fcol), clust)
conta.ext = agg$conta.id[which(!is.na(match( agg$clust, agg$clust[match(conta,agg$conta.id)])))]
return(as.vector(conta.ext))
}
else {
return(conta)
}
}