#' @include 02_class_metabarcoding.data.R NULL #' Detects contaminants in metabarcoding data #' #' Detects sequences/motus in a \code{\link{metabarcoding.data}} object #' for which frequencies over the entire dataset are maximum in negative controls and #' hence, most likely to be contaminants. #' #' #' @param x a \code{\link{metabarcoding.data}} object #' @param controls a vector of samples names where conta are suspected to be detected #' (typically negative control names). #' @param clust a vector for grouping sequences. Default set to \code{NULL}. #' #' @return a vector containing the names of sequences identified as contaminants #' #' @examples #' #' data(termes) #' termes.ok = termes[,colSums(termes$reads)>0] #' neg = rownames(termes.ok)[grep("r",rownames(termes.ok))] #' #' #finds contaminants based on neg samples #' contaslayer(termes.ok, neg) #' #' # extanding contamininant detection with grouping factor, #' # typically obiclean/sumatra cluster or taxonomy membership #' contaslayer(termes.ok, neg, termes.ok$motus$scientific_name) #' #' @seealso \code{\link{threshold}} for further trimming #' @author Lucie Zinger #' @export contaslayer = function(x,controls,clust=NULL){ x.fcol = normalize(x, MARGIN=2)$reads x.max = rownames(x.fcol[apply(x.fcol, 2, which.max),]) conta = colnames(x)[!is.na(match(x.max,controls))] if (length(clust)!=0) { agg = data.frame(conta.id=colnames(x.fcol), clust) conta.ext = agg$conta.id[which(!is.na(match( agg$clust, agg$clust[match(conta,agg$conta.id)])))] return(as.vector(conta.ext)) } else { return(conta) } }