Files
ROBITools/R/experimental.section.R
2018-02-20 06:40:29 +11:00

207 lines
5.8 KiB
R

#' @include 02_class_metabarcoding.data.R
NULL
#11.03.2011
#L.Zinger
#######################
#function anosim.pw
#######################
#computes pairwise anosim computation
#input:
#dat: dissimilarity matrix
#g: factor defining the grouping to test
#permutations: nb of permutation to access anosim statistics
#p.adjust.method: method of correction for multiple-testing
#
#output: a distance-like table containing:
#in the upper triangle: the anosims R values
#in the lower triangle: the adjusted p-values
### start
anosim.pw<-function(dat, g, permutations, p.adjust.method, ...) {
require(vegan)
#data.trasformation
dat<-as.matrix(dat)
g<-factor(g)
#empty object for result storage
ano<-matrix(NA, nrow=nlevels(g), ncol=nlevels(g), dimnames=list(levels(g),levels(g)))
p.val.tmp<-NULL
#running anosims
for(i in 1:(nlevels(g)-1)) for(j in (i+1):nlevels(g)){
tmp<-anosim(as.dist(dat[c(which(g==levels(g)[i]),which(g==levels(g)[j])),
c(which(g==levels(g)[i]),which(g==levels(g)[j]))]),
c(rep(levels(g)[i], length(which(g==levels(g)[i]))),
rep(levels(g)[j], length(which(g==levels(g)[j])))), permutations)
ano[i,j]<-tmp$statistic
p.val.tmp<-append(p.val.tmp, tmp$signif)
}
#p value correction for multiple comparison
p.val.tmp<-p.adjust(p.val.tmp, p.adjust.method )
#put the corrected p values in the anosim table
tmp<-NULL
tmp2<-NULL
for(i in 1:(nlevels(g)-1)) for(j in (i+1):nlevels(g)){
tmp<-append(tmp,i)
tmp2<-append(tmp2,j)
}
for(i in 1:length(p.val.tmp)){
ano[tmp2[i],tmp[i]]<-p.val.tmp[i]}
return(ano)
}
### end
#23 Nov 2012
#L.Zinger
###################
#function MOTUtable
###################
# Generates ready-to-use MOTU tables and basic statistics on samples (i.e. sequencing depth, raw richness, and invsimpson index)
#input:
#x: an obitable output (samples should be indicated as e.g. "sample.A01r" in column names)
#y: the column name by which that data are to be aggregated. Should be e.g. "cluster" or "species_name"
#outputs:
#x.otu: the ready-to-use MOTU table
#x.rawstats: basic statistics on samples
### start
MOTUtable<-function(x, y) {
require(vegan)
nom<-as.character(substitute(x))
tmp<-x[,c(grep(y, colnames(x)), grep("sample", colnames(x)))]
tmp2<-t(aggregate(tmp[,-1], by=list(tmp[,1]), sum))
x.otu<-tmp2[-1,]
colnames(x.otu)<-paste(y,tmp2[1,], sep=".")
x.rawstats<-data.frame(Nb_ind=rowSums(x.otu), Raw_richness=specnumber(x.otu, MARGIN=1), Raw_eveness=diversity(x.otu, "invsimpson", MARGIN=1) )
#may have a pb in the rowSums depending on the R version (allows or not non-numeric)
assign(paste(nom, y, sep="."),x.otu,env = .GlobalEnv)
assign(paste(nom, y, "rawstats", sep="."),x.rawstats,env = .GlobalEnv)
}
### end
#26 Nov 2012
#F.Boyer
###################
#function reads.frequency & filter.threshold
###################
#can be used to filter the table of reads to have the sequences that represents at least 95% of the total reads by sample
#
#e.g. reads.treshold(reads.frequency(metabarcodingS4Obj@reads), 0.95)
filter.threshold <- function(v, threshold) {
o <- order(v, decreasing=T)
ind <- which(cumsum(as.matrix(v[o]))>threshold)
v[-o[seq(min(length(o), 1+length(o)-length(ind)))]] <- 0
v
}
reads.threshold <- function (reads, threshold, by.sample=T) {
res <- apply(reads, MARGIN=ifelse(by.sample, 1, 2), filter.threshold, thr=threshold)
if (by.sample) res <- t(res)
data.frame(res)
}
reads.frequency <- function (reads, by.sample=T) {
res <- apply(reads, MARGIN=ifelse(by.sample, 1, 2), function(v) {v/sum(v)})
if (by.sample) res <- t(res)
data.frame(res)
}
#06 Jan 2013
#F.Boyer
###################
#function removeOutliers
###################
#given a contengency table and a distance matrix
#returns the list of samples that should be removed in order to have only
#distances below thresold
#can't return only one sample
#
#e.g. intraBad <- lapply(levels(sample.desc$sampleName), function(group) {samples<-rownames(sample.desc)[sample.desc$sampleName==group]; removeOutliers(contingencyTable[samples,], thr=0.3, distFun = function(x) vegdist(x, method='bray'))})
#require(vegan)
removeOutliers <- function(m, thr=0.3, distFun = function(x) vegdist(x, method='bray') ) {
distMat <- as.matrix(distFun(m))
maxM <- max(distMat)
theBadGuys =c()
while (maxM>thr) {
bad <- apply(distMat, MARGIN=1, function(row, maxM) {any(row==maxM)}, maxM=maxM)
bad <- names(bad)[bad]
bad <- apply(distMat[bad,], MARGIN=1, mean)
badGuy <- names(bad)[bad==max(bad), drop=F][1]
theBadGuys <- c(theBadGuys, badGuy)
stillok <- rownames(distMat) != badGuy
distMat <- distMat[stillok, stillok, drop=F]
maxM <- max(distMat)
}
if (length(theBadGuys) >= (nrow(m)-1)) {
theBadGuys <- rownames(m)
}
theBadGuys
}
#31.05.2013
#L.Zinger
#getAttrPerS, a function allowing to get the values of a sequence attribute per sample
#(e.g. best_identities, etc...) the output is a list with one dataframe per sample.
#This dataframe contains:
# first column (named as attr): the attribute value for each sequence present in the sample
# second column (named weight): the corresponding number of reads in the sample
getAttrPerS=function(x,attr){
#x: a metabarcoding object
#attr: a character object corresponding to the attribute
#for which values per sample are needed (should be equal to a colname in x@motus)
if(class(x)[1]!= "metabarcoding.data") {
stop("x is not a metabarcoding S4 object")
}
if(is.character(attr)==F) {
stop("attr is not a character object")
}
x.motus = motus(x)
x.reads = reads(x)
otu = apply(x.reads, 1, function(y) x.motus[match(names(y[which(y!=0)]),x.motus$id), grep(attr, colnames(x.motus))])
reads = apply(x.reads, 1, function(y) y[which(y!=0)])
output = mapply(cbind, otu, reads)
output = lapply(output, function(y) {
colnames(y)=c(attr,"weight")
return(y)
})
return(output)
}
### end getAttrPerS