207 lines
5.8 KiB
R
207 lines
5.8 KiB
R
#' @include 02_class_metabarcoding.data.R
|
|
NULL
|
|
|
|
#11.03.2011
|
|
#L.Zinger
|
|
|
|
#######################
|
|
#function anosim.pw
|
|
#######################
|
|
#computes pairwise anosim computation
|
|
#input:
|
|
#dat: dissimilarity matrix
|
|
#g: factor defining the grouping to test
|
|
#permutations: nb of permutation to access anosim statistics
|
|
#p.adjust.method: method of correction for multiple-testing
|
|
#
|
|
#output: a distance-like table containing:
|
|
#in the upper triangle: the anosims R values
|
|
#in the lower triangle: the adjusted p-values
|
|
|
|
|
|
### start
|
|
|
|
anosim.pw<-function(dat, g, permutations, p.adjust.method, ...) {
|
|
require(vegan)
|
|
#data.trasformation
|
|
dat<-as.matrix(dat)
|
|
g<-factor(g)
|
|
|
|
#empty object for result storage
|
|
ano<-matrix(NA, nrow=nlevels(g), ncol=nlevels(g), dimnames=list(levels(g),levels(g)))
|
|
p.val.tmp<-NULL
|
|
#running anosims
|
|
for(i in 1:(nlevels(g)-1)) for(j in (i+1):nlevels(g)){
|
|
tmp<-anosim(as.dist(dat[c(which(g==levels(g)[i]),which(g==levels(g)[j])),
|
|
c(which(g==levels(g)[i]),which(g==levels(g)[j]))]),
|
|
c(rep(levels(g)[i], length(which(g==levels(g)[i]))),
|
|
rep(levels(g)[j], length(which(g==levels(g)[j])))), permutations)
|
|
ano[i,j]<-tmp$statistic
|
|
p.val.tmp<-append(p.val.tmp, tmp$signif)
|
|
}
|
|
|
|
#p value correction for multiple comparison
|
|
p.val.tmp<-p.adjust(p.val.tmp, p.adjust.method )
|
|
|
|
#put the corrected p values in the anosim table
|
|
tmp<-NULL
|
|
tmp2<-NULL
|
|
for(i in 1:(nlevels(g)-1)) for(j in (i+1):nlevels(g)){
|
|
tmp<-append(tmp,i)
|
|
tmp2<-append(tmp2,j)
|
|
}
|
|
for(i in 1:length(p.val.tmp)){
|
|
ano[tmp2[i],tmp[i]]<-p.val.tmp[i]}
|
|
|
|
return(ano)
|
|
}
|
|
|
|
### end
|
|
|
|
|
|
|
|
|
|
#23 Nov 2012
|
|
#L.Zinger
|
|
###################
|
|
#function MOTUtable
|
|
###################
|
|
# Generates ready-to-use MOTU tables and basic statistics on samples (i.e. sequencing depth, raw richness, and invsimpson index)
|
|
#input:
|
|
#x: an obitable output (samples should be indicated as e.g. "sample.A01r" in column names)
|
|
#y: the column name by which that data are to be aggregated. Should be e.g. "cluster" or "species_name"
|
|
#outputs:
|
|
#x.otu: the ready-to-use MOTU table
|
|
#x.rawstats: basic statistics on samples
|
|
|
|
### start
|
|
|
|
MOTUtable<-function(x, y) {
|
|
|
|
require(vegan)
|
|
nom<-as.character(substitute(x))
|
|
|
|
tmp<-x[,c(grep(y, colnames(x)), grep("sample", colnames(x)))]
|
|
tmp2<-t(aggregate(tmp[,-1], by=list(tmp[,1]), sum))
|
|
x.otu<-tmp2[-1,]
|
|
colnames(x.otu)<-paste(y,tmp2[1,], sep=".")
|
|
|
|
x.rawstats<-data.frame(Nb_ind=rowSums(x.otu), Raw_richness=specnumber(x.otu, MARGIN=1), Raw_eveness=diversity(x.otu, "invsimpson", MARGIN=1) )
|
|
#may have a pb in the rowSums depending on the R version (allows or not non-numeric)
|
|
|
|
assign(paste(nom, y, sep="."),x.otu,env = .GlobalEnv)
|
|
assign(paste(nom, y, "rawstats", sep="."),x.rawstats,env = .GlobalEnv)
|
|
}
|
|
|
|
### end
|
|
|
|
|
|
|
|
|
|
#26 Nov 2012
|
|
#F.Boyer
|
|
###################
|
|
#function reads.frequency & filter.threshold
|
|
###################
|
|
#can be used to filter the table of reads to have the sequences that represents at least 95% of the total reads by sample
|
|
#
|
|
#e.g. reads.treshold(reads.frequency(metabarcodingS4Obj@reads), 0.95)
|
|
|
|
|
|
filter.threshold <- function(v, threshold) {
|
|
o <- order(v, decreasing=T)
|
|
ind <- which(cumsum(as.matrix(v[o]))>threshold)
|
|
v[-o[seq(min(length(o), 1+length(o)-length(ind)))]] <- 0
|
|
v
|
|
}
|
|
|
|
reads.threshold <- function (reads, threshold, by.sample=T) {
|
|
res <- apply(reads, MARGIN=ifelse(by.sample, 1, 2), filter.threshold, thr=threshold)
|
|
if (by.sample) res <- t(res)
|
|
data.frame(res)
|
|
}
|
|
|
|
reads.frequency <- function (reads, by.sample=T) {
|
|
res <- apply(reads, MARGIN=ifelse(by.sample, 1, 2), function(v) {v/sum(v)})
|
|
if (by.sample) res <- t(res)
|
|
data.frame(res)
|
|
}
|
|
|
|
|
|
#06 Jan 2013
|
|
#F.Boyer
|
|
###################
|
|
#function removeOutliers
|
|
###################
|
|
#given a contengency table and a distance matrix
|
|
#returns the list of samples that should be removed in order to have only
|
|
#distances below thresold
|
|
#can't return only one sample
|
|
#
|
|
#e.g. intraBad <- lapply(levels(sample.desc$sampleName), function(group) {samples<-rownames(sample.desc)[sample.desc$sampleName==group]; removeOutliers(contingencyTable[samples,], thr=0.3, distFun = function(x) vegdist(x, method='bray'))})
|
|
|
|
|
|
|
|
#require(vegan)
|
|
removeOutliers <- function(m, thr=0.3, distFun = function(x) vegdist(x, method='bray') ) {
|
|
distMat <- as.matrix(distFun(m))
|
|
maxM <- max(distMat)
|
|
theBadGuys =c()
|
|
|
|
while (maxM>thr) {
|
|
bad <- apply(distMat, MARGIN=1, function(row, maxM) {any(row==maxM)}, maxM=maxM)
|
|
bad <- names(bad)[bad]
|
|
bad <- apply(distMat[bad,], MARGIN=1, mean)
|
|
badGuy <- names(bad)[bad==max(bad), drop=F][1]
|
|
|
|
theBadGuys <- c(theBadGuys, badGuy)
|
|
|
|
stillok <- rownames(distMat) != badGuy
|
|
distMat <- distMat[stillok, stillok, drop=F]
|
|
maxM <- max(distMat)
|
|
}
|
|
|
|
if (length(theBadGuys) >= (nrow(m)-1)) {
|
|
theBadGuys <- rownames(m)
|
|
}
|
|
theBadGuys
|
|
}
|
|
|
|
|
|
#31.05.2013
|
|
#L.Zinger
|
|
#getAttrPerS, a function allowing to get the values of a sequence attribute per sample
|
|
#(e.g. best_identities, etc...) the output is a list with one dataframe per sample.
|
|
#This dataframe contains:
|
|
# first column (named as attr): the attribute value for each sequence present in the sample
|
|
# second column (named weight): the corresponding number of reads in the sample
|
|
|
|
getAttrPerS=function(x,attr){
|
|
#x: a metabarcoding object
|
|
#attr: a character object corresponding to the attribute
|
|
#for which values per sample are needed (should be equal to a colname in x@motus)
|
|
|
|
if(class(x)[1]!= "metabarcoding.data") {
|
|
stop("x is not a metabarcoding S4 object")
|
|
}
|
|
|
|
if(is.character(attr)==F) {
|
|
stop("attr is not a character object")
|
|
}
|
|
|
|
x.motus = motus(x)
|
|
x.reads = reads(x)
|
|
|
|
otu = apply(x.reads, 1, function(y) x.motus[match(names(y[which(y!=0)]),x.motus$id), grep(attr, colnames(x.motus))])
|
|
reads = apply(x.reads, 1, function(y) y[which(y!=0)])
|
|
|
|
output = mapply(cbind, otu, reads)
|
|
output = lapply(output, function(y) {
|
|
colnames(y)=c(attr,"weight")
|
|
return(y)
|
|
})
|
|
return(output)
|
|
}
|
|
### end getAttrPerS
|
|
|