Files
ROBITools/R/import.metabarcoding.R
2018-02-20 06:40:29 +11:00

107 lines
3.0 KiB
R

#' @include read.obitab.R
#' @include 02_class_metabarcoding.data.R
NULL
#' Read a data file produced by the \code{obitab} command
#'
#' Read a data file issued from the conversion of a \strong{fasta}
#' file to a tabular file by the \code{obitab} command of the
#' \strong{OBITools} package
#'
#' @param file a string containing the file name of the obitab file.
#' @param sep Column separator in the obitab file.
#' The default separator is the tabulation.
#' @param sample A regular expression allowing to identify columns
#' from the file describing abundances of sequences per sample
#' @param sample.sep Separator between combined sample name.
#' @param attribute Separator used to split between sample 'tag' and sample name.
#'
#' @return a \code{\link{metabarcoding.data}} instance
#'
#' @examples
#' require(ROBITools)
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
#'
#' # read the termes.tab file
#' termes=import.metabarcoding.data('termes.tab')
#'
#' # print the number of samples and motus described in the file
#' dim(termes)
#'
#' @seealso \code{\link{metabarcoding.data}}
#'
#' @author Eric Coissac
#' @keywords DNA metabarcoding
#' @export
#'
import.metabarcoding.data = function(file,sep='\t',sample="sample",sample.sep="\\.",attribute=":") {
data=read.obitab(file,sep=sep)
# get the colnames matching the sample pattern
column=colnames(data)
pat = paste('(^|',sample.sep,')',sample,'[',sample.sep,attribute,']',sep='')
scol= grep(pat,column)
# reads informations about samples
reads = data[,scol]
names = colnames(reads)
names = strsplit(names,split=attribute)
# for sample name just remove the first part of the col names
# usally "sample:"
sample.names = sapply(names,function(a) paste(a[-1],collapse=attribute))
reads=t(reads)
rownames(reads)=sample.names
# sample's data
sample.data = data.frame(t(data.frame(strsplit(sample.names,split=attribute))))
rownames(sample.data)=sample.names
colnames(sample.data)=strsplit(names[[1]][1],split=attribute)
# motus information
motus = data[,-scol]
motus.id = motus$id
rownames(motus)=motus.id
colnames(reads)=motus.id
return(metabarcoding.data(reads,sample.data,motus))
}
#pcr = gh[,grep('^sample',colnames(gh))]
#pcr.names = colnames(pcr)
#pcr.names = sub('sample\\.','',pcr.names)
#sequencer = rep('Solexa',length(pcr.names))
#sequencer[grep('454',pcr.names)]='454'
#sequencer=factor(sequencer)
#
#tmp = strsplit(pcr.names,'\\.[A-Z](sol|454)\\.')
#
#sample = sapply(tmp,function(x) x[1])
#locality = factor(sapply(strsplit(sample,'_'),function(x) x[1]))
#sample = factor(sample)
#repeats= factor(sapply(tmp,function(x) x[2]))
#
#tmp = regexpr('[A-Z](454|sol)',pcr.names)
#run=factor(substr(pcr.names,tmp,tmp+attr(tmp,"match.length")-1))
#
#pcr.metadata = data.frame(run,sequencer,locality,sample,repeats)
#
#rownames(pcr.metadata)=pcr.names