163 lines
5.3 KiB
R
163 lines
5.3 KiB
R
#'@include ROBIBarcodes.R
|
|
NULL
|
|
|
|
# column 1 : accession number
|
|
# column 2 : sequence length
|
|
# column 3 : taxonomic id
|
|
# column 4 : rank
|
|
# column 5 : species taxonomic id
|
|
# column 6 : scientific name
|
|
# column 7 : genus taxonomic id
|
|
# column 8 : genus name
|
|
# column 9 : family taxonomic id
|
|
# column 10 : family name
|
|
# column 11 : super kingdom taxonomic id
|
|
# column 12 : super kingdom name
|
|
# column 13 : strand (direct or reverse)
|
|
# column 14 : first oligonucleotide
|
|
# column 15 : number of errors for the first strand
|
|
# column 16 : Tm for hybridization of primer 1 at this site
|
|
# column 17 : second oligonucleotide
|
|
# column 18 : number of errors for the second strand
|
|
# column 19 : Tm for hybridization of primer 1 at this site
|
|
# column 20 : amplification length
|
|
# column 21 : sequence
|
|
# column 22 : definition
|
|
|
|
#' Read the result file produced by the ecoPCR program.
|
|
#'
|
|
#' @export
|
|
read.ecopcr.result = function(file)
|
|
{
|
|
|
|
split.line = function(line) {
|
|
l = strsplit(line,split=" +\\| +")[[1]]
|
|
l = c(l[1:21],paste(l[-c(1:21)],sep="|"))
|
|
return(l)
|
|
}
|
|
|
|
if (missing(file) && !missing(text)) {
|
|
file <- textConnection(text)
|
|
on.exit(close(file))
|
|
}
|
|
if (is.character(file)) {
|
|
file <- file(file, "rt")
|
|
on.exit(close(file))
|
|
}
|
|
if (!inherits(file, "connection"))
|
|
stop("'file' must be a character string or connection")
|
|
if (!isOpen(file, "rt")) {
|
|
open(file, "rt")
|
|
on.exit(close(file))
|
|
}
|
|
|
|
line = readLines(file,1)
|
|
while (length(grep('^#',line))==1) {
|
|
line = readLines(file,1)
|
|
}
|
|
|
|
pushBack(line,file)
|
|
|
|
lines = lapply(readLines(file),split.line)
|
|
nlines = length(lines)
|
|
AC = sapply(1:nlines,function(x) lines[[x]][1])
|
|
seq_length = as.integer(sapply(1:nlines,function(x) lines[[x]][2]))
|
|
taxid = as.integer(sapply(1:nlines,function(x) lines[[x]][3]))
|
|
rank = as.factor(sapply(1:nlines,function(x) lines[[x]][4]))
|
|
species = type.convert(sapply(1:nlines,function(x) lines[[x]][5]),na.string="###")
|
|
species_name = sapply(1:nlines,function(x) lines[[x]][6])
|
|
genus = type.convert(sapply(1:nlines,function(x) lines[[x]][7]),na.string="###")
|
|
genus_name = sapply(1:nlines,function(x) lines[[x]][8])
|
|
family = type.convert(sapply(1:nlines,function(x) lines[[x]][9]),na.string="###")
|
|
family_name = sapply(1:nlines,function(x) lines[[x]][10])
|
|
superkingdom = type.convert(sapply(1:nlines,function(x) lines[[x]][11]),na.string="###")
|
|
superkingdom_name = sapply(1:nlines,function(x) lines[[x]][12])
|
|
strand = as.factor(sapply(1:nlines,function(x) lines[[x]][13]))
|
|
forward_match = sapply(1:nlines,function(x) lines[[x]][14])
|
|
forward_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][15]))
|
|
forward_tm = as.double(sapply(1:nlines,function(x) lines[[x]][16]))
|
|
reverse_match = sapply(1:nlines,function(x) lines[[x]][17])
|
|
reverse_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][18]))
|
|
reverse_tm = as.double(sapply(1:nlines,function(x) lines[[x]][19]))
|
|
amplicon_length = as.integer(sapply(1:nlines,function(x) lines[[x]][20]))
|
|
sequence = sapply(1:nlines,function(x) lines[[x]][21])
|
|
definition = sapply(1:nlines,function(x) lines[[x]][22])
|
|
|
|
eco = data.frame(AC,seq_length,taxid,rank,
|
|
species,species_name,
|
|
genus,genus_name,
|
|
family,family_name,
|
|
superkingdom,superkingdom_name,
|
|
strand,
|
|
forward_match,forward_mismatch,forward_tm,
|
|
reverse_match,reverse_mismatch,reverse_tm,
|
|
amplicon_length,sequence,definition
|
|
)
|
|
|
|
return(eco)
|
|
}
|
|
|
|
ecopcr.frequencies = function(matches,group=NULL) {
|
|
compute = function(matches) {
|
|
w = as.matrix(do.call(rbind,strsplit(as.character(matches),'')))
|
|
d = dim(w)
|
|
w=factor(w,levels=c('A','C','G','T'))
|
|
dim(w)=d
|
|
w=t(w)
|
|
freq = mapply(function(x) table(w[x,]),1:d[2])
|
|
freq = freq[c('A','C','G','T'),]
|
|
csum = colSums(freq)
|
|
freq = sweep(freq,2,csum,'/')
|
|
attr(freq,'count')=length(w)
|
|
return(freq)
|
|
}
|
|
if (is.null(group))
|
|
return(compute(matches))
|
|
else {
|
|
lmatches = aggregate(matches,by=list(group=as.factor(group)),as.character)
|
|
w = lmatches$x
|
|
names(w)=lmatches$group
|
|
lf = lapply(w,compute)
|
|
return(lf)
|
|
}
|
|
}
|
|
|
|
#' @export
|
|
ecopcr.forward.frequencies = function(ecopcr,group=NULL) {
|
|
return(ecopcr.frequencies(ecopcr$forward_match,group))
|
|
}
|
|
|
|
#' @export
|
|
ecopcr.reverse.frequencies = function(ecopcr,group=NULL) {
|
|
return(ecopcr.frequencies(ecopcr$reverse_match,group))
|
|
}
|
|
|
|
#' @export
|
|
dna.shanon = function(freq,base=2) {
|
|
shanon = log(4)/log(base) - colSums(-freq *log(freq) / log(base),na.rm=TRUE)
|
|
return(sweep(freq,2,shanon,'*'))
|
|
}
|
|
|
|
|
|
ecopcr.shanon = function(matches,group=NULL,base=2) {
|
|
|
|
if (is.null(group)) {
|
|
freq = ecopcr.frequencies(matches)
|
|
return(dna.shanon(freq))
|
|
}
|
|
else {
|
|
lf = lapply(ecopcr.frequencies(matches,group),dna.shanon)
|
|
return(lf)
|
|
}
|
|
}
|
|
|
|
#' @export
|
|
ecopcr.forward.shanon = function(ecopcr,group=NULL,base=2) {
|
|
return(ecopcr.shanon(ecopcr$forward_match,group,base))
|
|
}
|
|
|
|
#' @export
|
|
ecopcr.reverse.shanon = function(ecopcr,group=NULL,base=2) {
|
|
return(ecopcr.shanon(ecopcr$reverse_match,group,base))
|
|
}
|