145 lines
5.5 KiB
R
145 lines
5.5 KiB
R
positive = read.delim("RawData/positifs.uniq.annotated.txt",
|
|
sep="\t",
|
|
header = TRUE)
|
|
|
|
|
|
columns = names(positive)
|
|
columns.info = c("id", "dilution", "species_name", "taxid", "true", "sequence")
|
|
columns.counts= columns[grep("^sample\\.",columns)]
|
|
|
|
positive.count = t(positive[,columns.counts])
|
|
|
|
motus = as.data.frame(positive[,columns.info])
|
|
positive.motus = data.frame(dilution = as.numeric(motus$dilution)/2,
|
|
species = as.character(motus$species_name),
|
|
taxid = as.integer(motus$taxid),
|
|
true = motus$true == "True"
|
|
)
|
|
|
|
|
|
samples.names = rownames(positive.count)
|
|
|
|
samples = t(simplify2array(strsplit(samples.names,split="_")))
|
|
|
|
# [,1] [,2] [,3] [,4] [,5] [,6]
|
|
# [1,] "sample.TM" "POS" "d16" "1" "a" "A1"
|
|
# [2,] "sample.TM" "POS" "d16" "1" "a" "B1"
|
|
# [3,] "sample.TM" "POS" "d16" "1" "b" "A2"
|
|
# [4,] "sample.TM" "POS" "d16" "1" "b" "B2"
|
|
# [5,] "sample.TM" "POS" "d16" "2" "a" "A1"
|
|
# [6,] "sample.TM" "POS" "d16" "2" "a" "B1"
|
|
|
|
samples = as.data.frame(samples[,3:6])
|
|
names(samples) = c("dilution","repeats","PCR","Plate")
|
|
|
|
positive.samples = data.frame(dilution = 32%/%as.integer(substr(as.character(samples$dilution),2,10)),
|
|
repeats = interaction(samples[,2:4],drop = TRUE)
|
|
)
|
|
|
|
rownames(positive.samples)= samples.names
|
|
rownames(positive.count) = samples.names
|
|
|
|
rownames(positive.motus) = positive$id
|
|
colnames(positive.count) = positive$id
|
|
|
|
plants.16 = positive.motus[positive.motus$true,][,c(2,3,1)]
|
|
|
|
plants.16 = plants.16[order(1/plants.16$dilution),]
|
|
plants.16$log10.dilution = - seq_len(nrow(plants.16)) / log(10)*log(2)
|
|
plants.16$dilution = 1/(2^seq_len(nrow(plants.16)))
|
|
usethis::use_data(positive.samples,overwrite = TRUE)
|
|
usethis::use_data(positive.motus,overwrite = TRUE)
|
|
usethis::use_data(positive.count,overwrite = TRUE)
|
|
usethis::use_data(plants.16,overwrite = TRUE)
|
|
|
|
positive.clean = read.delim("RawData/positifs.uniq.annotated.clean.txt",
|
|
sep="\t",
|
|
header = TRUE)
|
|
|
|
columns = names(positive.clean)
|
|
columns.info = c("id", "dilution", "species_name", "taxid", "true", "sequence")
|
|
columns.counts= columns[grep("^sample\\.",columns)]
|
|
|
|
positive.clean.count = t(positive.clean[,columns.counts])
|
|
|
|
motus.clean = as.data.frame(positive.clean[,columns.info])
|
|
positive.clean.motus = data.frame(dilution = as.numeric(motus.clean$dilution)/2,
|
|
species = as.character(motus.clean$species_name),
|
|
taxid = as.integer(motus.clean$taxid),
|
|
true = motus.clean$true == "True"
|
|
)
|
|
|
|
samples.names = rownames(positive.clean.count)
|
|
|
|
samples = t(simplify2array(strsplit(samples.names,split="_")))
|
|
|
|
samples = as.data.frame(samples[,3:6])
|
|
names(samples) = c("dilution","repeats","PCR","Plate")
|
|
|
|
positive.clean.samples = data.frame(dilution = 32%/%as.integer(substr(as.character(samples$dilution),2,10)),
|
|
repeats = interaction(samples[,2:4],drop = TRUE)
|
|
)
|
|
|
|
rownames(positive.clean.samples)= samples.names
|
|
rownames(positive.clean.count) = samples.names
|
|
|
|
rownames(positive.clean.motus) = positive.clean$id
|
|
colnames(positive.clean.count) = positive.clean$id
|
|
|
|
usethis::use_data(positive.clean.samples,overwrite = TRUE)
|
|
usethis::use_data(positive.clean.motus,overwrite = TRUE)
|
|
usethis::use_data(positive.clean.count,overwrite = TRUE)
|
|
|
|
|
|
#
|
|
# Litter/Soil dataset
|
|
#
|
|
|
|
|
|
guiana = read.delim("RawData/litiere_ins_cl97_agg_filt_tax.tab",
|
|
header = TRUE,
|
|
sep="\t")
|
|
|
|
columns = names(guiana)
|
|
|
|
columns.info = c("id","best_identity.order_filtered_embl_r136_noenv_INS",
|
|
"taxid",
|
|
"phylum_name","order_name","class_name","family_name","genus_name","species_name",
|
|
"sequence")
|
|
|
|
columns.counts= columns[grep("^sample\\.",columns)]
|
|
|
|
samples.names = gsub(pattern = "sample.",
|
|
replacement = "",
|
|
columns.counts)
|
|
|
|
guiana.count = t(guiana[,columns.counts])
|
|
|
|
motus = as.data.frame(guiana[,columns.info])
|
|
guiana.motus = data.frame(id = paste("EUK",sprintf("%06d",1:nrow(motus)),sep=""),
|
|
best_id = motus$best_identity.order_filtered_embl_r136_noenv_INS,
|
|
taxid = as.integer(motus$taxid),
|
|
species = factor(as.character(motus$species_name)),
|
|
genus = factor(as.character(motus$genus_name)),
|
|
family = factor(as.character(motus$family_name)),
|
|
class = factor(as.character(motus$class_name)),
|
|
order = factor(as.character(motus$order_name)),
|
|
phylum = factor(as.character(motus$phylum_name)),
|
|
sequence = as.character(motus$sequence),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
samples = read.delim("RawData/Litiere_sample_list.txt",header=TRUE)
|
|
|
|
guiana.samples = samples[samples.names,]
|
|
guiana.samples$sample = as.factor(sub("_r.$","",samples.names))
|
|
|
|
|
|
rownames(guiana.count) = samples.names
|
|
colnames(guiana.count) = guiana.motus$id
|
|
rownames(guiana.motus) = guiana.motus$id
|
|
|
|
usethis::use_data(guiana.samples,overwrite = TRUE)
|
|
usethis::use_data(guiana.motus,overwrite = TRUE)
|
|
usethis::use_data(guiana.count,overwrite = TRUE)
|