Files
ROBIBarcodes/R/ecopcr.R
2016-01-13 10:23:54 +01:00

163 lines
5.3 KiB
R

#'@include ROBIBarcodes.R
NULL
# column 1 : accession number
# column 2 : sequence length
# column 3 : taxonomic id
# column 4 : rank
# column 5 : species taxonomic id
# column 6 : scientific name
# column 7 : genus taxonomic id
# column 8 : genus name
# column 9 : family taxonomic id
# column 10 : family name
# column 11 : super kingdom taxonomic id
# column 12 : super kingdom name
# column 13 : strand (direct or reverse)
# column 14 : first oligonucleotide
# column 15 : number of errors for the first strand
# column 16 : Tm for hybridization of primer 1 at this site
# column 17 : second oligonucleotide
# column 18 : number of errors for the second strand
# column 19 : Tm for hybridization of primer 1 at this site
# column 20 : amplification length
# column 21 : sequence
# column 22 : definition
#' Read the result file produced by the ecoPCR program.
#'
#' @export
read.ecopcr.result = function(file)
{
split.line = function(line) {
l = strsplit(line,split=" +\\| +")[[1]]
l = c(l[1:21],paste(l[-c(1:21)],sep="|"))
return(l)
}
if (missing(file) && !missing(text)) {
file <- textConnection(text)
on.exit(close(file))
}
if (is.character(file)) {
file <- file(file, "rt")
on.exit(close(file))
}
if (!inherits(file, "connection"))
stop("'file' must be a character string or connection")
if (!isOpen(file, "rt")) {
open(file, "rt")
on.exit(close(file))
}
line = readLines(file,1)
while (length(grep('^#',line))==1) {
line = readLines(file,1)
}
pushBack(line,file)
lines = lapply(readLines(file),split.line)
nlines = length(lines)
AC = sapply(1:nlines,function(x) lines[[x]][1])
seq_length = as.integer(sapply(1:nlines,function(x) lines[[x]][2]))
taxid = as.integer(sapply(1:nlines,function(x) lines[[x]][3]))
rank = as.factor(sapply(1:nlines,function(x) lines[[x]][4]))
species = type.convert(sapply(1:nlines,function(x) lines[[x]][5]),na.string="###")
species_name = sapply(1:nlines,function(x) lines[[x]][6])
genus = type.convert(sapply(1:nlines,function(x) lines[[x]][7]),na.string="###")
genus_name = sapply(1:nlines,function(x) lines[[x]][8])
family = type.convert(sapply(1:nlines,function(x) lines[[x]][9]),na.string="###")
family_name = sapply(1:nlines,function(x) lines[[x]][10])
superkingdom = type.convert(sapply(1:nlines,function(x) lines[[x]][11]),na.string="###")
superkingdom_name = sapply(1:nlines,function(x) lines[[x]][12])
strand = as.factor(sapply(1:nlines,function(x) lines[[x]][13]))
forward_match = sapply(1:nlines,function(x) lines[[x]][14])
forward_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][15]))
forward_tm = as.double(sapply(1:nlines,function(x) lines[[x]][16]))
reverse_match = sapply(1:nlines,function(x) lines[[x]][17])
reverse_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][18]))
reverse_tm = as.double(sapply(1:nlines,function(x) lines[[x]][19]))
amplicon_length = as.integer(sapply(1:nlines,function(x) lines[[x]][20]))
sequence = sapply(1:nlines,function(x) lines[[x]][21])
definition = sapply(1:nlines,function(x) lines[[x]][22])
eco = data.frame(AC,seq_length,taxid,rank,
species,species_name,
genus,genus_name,
family,family_name,
superkingdom,superkingdom_name,
strand,
forward_match,forward_mismatch,forward_tm,
reverse_match,reverse_mismatch,reverse_tm,
amplicon_length,sequence,definition
)
return(eco)
}
ecopcr.frequencies = function(matches,group=NULL) {
compute = function(matches) {
w = as.matrix(do.call(rbind,strsplit(as.character(matches),'')))
d = dim(w)
w=factor(w,levels=c('A','C','G','T'))
dim(w)=d
w=t(w)
freq = mapply(function(x) table(w[x,]),1:d[2])
freq = freq[c('A','C','G','T'),]
csum = colSums(freq)
freq = sweep(freq,2,csum,'/')
attr(freq,'count')=length(w)
return(freq)
}
if (is.null(group))
return(compute(matches))
else {
lmatches = aggregate(matches,by=list(group=as.factor(group)),as.character)
w = lmatches$x
names(w)=lmatches$group
lf = lapply(w,compute)
return(lf)
}
}
#' @export
ecopcr.forward.frequencies = function(ecopcr,group=NULL) {
return(ecopcr.frequencies(ecopcr$forward_match,group))
}
#' @export
ecopcr.reverse.frequencies = function(ecopcr,group=NULL) {
return(ecopcr.frequencies(ecopcr$reverse_match,group))
}
#' @export
dna.shanon = function(freq,base=2) {
shanon = log(4)/log(base) - colSums(-freq *log(freq) / log(base),na.rm=TRUE)
return(sweep(freq,2,shanon,'*'))
}
ecopcr.shanon = function(matches,group=NULL,base=2) {
if (is.null(group)) {
freq = ecopcr.frequencies(matches)
return(dna.shanon(freq))
}
else {
lf = lapply(ecopcr.frequencies(matches,group),dna.shanon)
return(lf)
}
}
#' @export
ecopcr.forward.shanon = function(ecopcr,group=NULL,base=2) {
return(ecopcr.shanon(ecopcr$forward_match,group,base))
}
#' @export
ecopcr.reverse.shanon = function(ecopcr,group=NULL,base=2) {
return(ecopcr.shanon(ecopcr$reverse_match,group,base))
}