#' @include read.obitab.R #' @include 02_class_metabarcoding.data.R NULL #' Read a data file produced by the \code{obitab} command #' #' Read a data file issued from the conversion of a \strong{fasta} #' file to a tabular file by the \code{obitab} command of the #' \strong{OBITools} package #' #' @param file a string containing the file name of the obitab file. #' @param sep Column separator in the obitab file. #' The default separator is the tabulation. #' @param sample A regular expression allowing to identify columns #' from the file describing abundances of sequences per sample #' @param sample.sep Separator between combined sample name. #' @param attribute Separator used to split between sample 'tag' and sample name. #' #' @return a \code{\link{metabarcoding.data}} instance #' #' @examples #' require(ROBITools) #' #' \dontshow{# switch the working directory to the data package directory} #' \dontshow{setwd(system.file("extdata", package="ROBITools"))} #' #' # read the termes.tab file #' termes=import.metabarcoding.data('termes.tab') #' #' # print the number of samples and motus described in the file #' dim(termes) #' #' @seealso \code{\link{metabarcoding.data}} #' #' @author Eric Coissac #' @keywords DNA metabarcoding #' @export #' import.metabarcoding.data = function(file,sep='\t',sample="sample",sample.sep="\\.",attribute=":") { data=read.obitab(file,sep=sep) # get the colnames matching the sample pattern column=colnames(data) pat = paste('(^|',sample.sep,')',sample,'[',sample.sep,attribute,']',sep='') scol= grep(pat,column) # reads informations about samples reads = data[,scol] names = colnames(reads) names = strsplit(names,split=attribute) # for sample name just remove the first part of the col names # usally "sample:" sample.names = sapply(names,function(a) paste(a[-1],collapse=attribute)) reads=t(reads) rownames(reads)=sample.names # sample's data sample.data = data.frame(t(data.frame(strsplit(sample.names,split=attribute)))) rownames(sample.data)=sample.names colnames(sample.data)=strsplit(names[[1]][1],split=attribute) # motus information motus = data[,-scol] motus.id = motus$id rownames(motus)=motus.id colnames(reads)=motus.id return(metabarcoding.data(reads,sample.data,motus)) } #pcr = gh[,grep('^sample',colnames(gh))] #pcr.names = colnames(pcr) #pcr.names = sub('sample\\.','',pcr.names) #sequencer = rep('Solexa',length(pcr.names)) #sequencer[grep('454',pcr.names)]='454' #sequencer=factor(sequencer) # #tmp = strsplit(pcr.names,'\\.[A-Z](sol|454)\\.') # #sample = sapply(tmp,function(x) x[1]) #locality = factor(sapply(strsplit(sample,'_'),function(x) x[1])) #sample = factor(sample) #repeats= factor(sapply(tmp,function(x) x[2])) # #tmp = regexpr('[A-Z](454|sol)',pcr.names) #run=factor(substr(pcr.names,tmp,tmp+attr(tmp,"match.length")-1)) # #pcr.metadata = data.frame(run,sequencer,locality,sample,repeats) # #rownames(pcr.metadata)=pcr.names