positive = read.delim("RawData/positifs.uniq.annotated.txt", sep="\t", header = TRUE) columns = names(positive) columns.info = c("id", "dilution", "species_name", "taxid", "true", "sequence") columns.counts= columns[grep("^sample\\.",columns)] positive.count = t(positive[,columns.counts]) motus = as.data.frame(positive[,columns.info]) positive.motus = data.frame(dilution = as.numeric(motus$dilution)/2, species = as.character(motus$species_name), taxid = as.integer(motus$taxid), true = motus$true == "True" ) samples.names = rownames(positive.count) samples = t(simplify2array(strsplit(samples.names,split="_"))) # [,1] [,2] [,3] [,4] [,5] [,6] # [1,] "sample.TM" "POS" "d16" "1" "a" "A1" # [2,] "sample.TM" "POS" "d16" "1" "a" "B1" # [3,] "sample.TM" "POS" "d16" "1" "b" "A2" # [4,] "sample.TM" "POS" "d16" "1" "b" "B2" # [5,] "sample.TM" "POS" "d16" "2" "a" "A1" # [6,] "sample.TM" "POS" "d16" "2" "a" "B1" samples = as.data.frame(samples[,3:6]) names(samples) = c("dilution","repeats","PCR","Plate") positive.samples = data.frame(dilution = 32%/%as.integer(substr(as.character(samples$dilution),2,10)), repeats = interaction(samples[,2:4],drop = TRUE) ) rownames(positive.samples)= samples.names rownames(positive.count) = samples.names rownames(positive.motus) = positive$id colnames(positive.count) = positive$id plants.16 = positive.motus[positive.motus$true,][,c(2,3,1)] plants.16 = plants.16[order(1/plants.16$dilution),] plants.16$log10.dilution = - seq_len(nrow(plants.16)) / log(10)*log(2) plants.16$dilution = 1/(2^seq_len(nrow(plants.16))) usethis::use_data(positive.samples,overwrite = TRUE) usethis::use_data(positive.motus,overwrite = TRUE) usethis::use_data(positive.count,overwrite = TRUE) usethis::use_data(plants.16,overwrite = TRUE) positive.clean = read.delim("RawData/positifs.uniq.annotated.clean.txt", sep="\t", header = TRUE) columns = names(positive.clean) columns.info = c("id", "dilution", "species_name", "taxid", "true", "sequence") columns.counts= columns[grep("^sample\\.",columns)] positive.clean.count = t(positive.clean[,columns.counts]) motus.clean = as.data.frame(positive.clean[,columns.info]) positive.clean.motus = data.frame(dilution = as.numeric(motus.clean$dilution)/2, species = as.character(motus.clean$species_name), taxid = as.integer(motus.clean$taxid), true = motus.clean$true == "True" ) samples.names = rownames(positive.clean.count) samples = t(simplify2array(strsplit(samples.names,split="_"))) samples = as.data.frame(samples[,3:6]) names(samples) = c("dilution","repeats","PCR","Plate") positive.clean.samples = data.frame(dilution = 32%/%as.integer(substr(as.character(samples$dilution),2,10)), repeats = interaction(samples[,2:4],drop = TRUE) ) rownames(positive.clean.samples)= samples.names rownames(positive.clean.count) = samples.names rownames(positive.clean.motus) = positive.clean$id colnames(positive.clean.count) = positive.clean$id usethis::use_data(positive.clean.samples,overwrite = TRUE) usethis::use_data(positive.clean.motus,overwrite = TRUE) usethis::use_data(positive.clean.count,overwrite = TRUE) # # Litter/Soil dataset # guiana = read.delim("RawData/litiere_ins_cl97_agg_filt_tax.tab", header = TRUE, sep="\t") columns = names(guiana) columns.info = c("id","best_identity.order_filtered_embl_r136_noenv_INS", "taxid", "phylum_name","order_name","class_name","family_name","genus_name","species_name", "sequence") columns.counts= columns[grep("^sample\\.",columns)] samples.names = gsub(pattern = "sample.", replacement = "", columns.counts) guiana.count = t(guiana[,columns.counts]) motus = as.data.frame(guiana[,columns.info]) guiana.motus = data.frame(id = paste("EUK",sprintf("%06d",1:nrow(motus)),sep=""), best_id = motus$best_identity.order_filtered_embl_r136_noenv_INS, taxid = as.integer(motus$taxid), species = factor(as.character(motus$species_name)), genus = factor(as.character(motus$genus_name)), family = factor(as.character(motus$family_name)), class = factor(as.character(motus$class_name)), order = factor(as.character(motus$order_name)), phylum = factor(as.character(motus$phylum_name)), sequence = as.character(motus$sequence), stringsAsFactors = FALSE ) samples = read.delim("RawData/Litiere_sample_list.txt",header=TRUE) guiana.samples = samples[samples.names,] guiana.samples$sample = as.factor(sub("_r.$","",samples.names)) rownames(guiana.count) = samples.names colnames(guiana.count) = guiana.motus$id rownames(guiana.motus) = guiana.motus$id usethis::use_data(guiana.samples,overwrite = TRUE) usethis::use_data(guiana.motus,overwrite = TRUE) usethis::use_data(guiana.count,overwrite = TRUE)