-
+
- The MetabarSchool Package
- What do the reading numbers per PCR mean? -
- Rarefaction vs. relative frequencies +
- Rarefaction vs. relative frequencies
- alpha diversity metrics
- beta diversity metrics
- multidimentionnal analysis
- comparison between datasets
diff --git a/NAMESPACE b/NAMESPACE
index 835f9f3..1d0f808 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,5 +7,6 @@ export(H_spectrum)
export(exp_q)
export(log_q)
export(mode)
+export(norm)
export(tag_bad_pcr)
importFrom(Rdpack,reprompt)
diff --git a/R/norme.R b/R/norme.R
new file mode 100644
index 0000000..5d45a23
--- /dev/null
+++ b/R/norme.R
@@ -0,0 +1,16 @@
+#' @export
+norm <- function(data,l=2) {
+ no <- function(x,y) sum(abs(data[x,]-data[y,])^l)^(1/l)
+ n = nrow(data)
+ d = matrix(0,nrow = n,ncol = n)
+ for (i in 1:n)
+ for (j in i:n) {
+ d[i,j] <- no(i,j)
+ d[j,i] <- d[i,j]
+ }
+
+ rownames(d) = rownames(data)
+ colnames(d) = rownames(data)
+
+ as.dist(d)
+}
diff --git a/figures/dist_hellinger.afdesign b/figures/dist_hellinger.afdesign
index fb6838d..ac8d351 100644
Binary files a/figures/dist_hellinger.afdesign and b/figures/dist_hellinger.afdesign differ
diff --git a/figures/diversity.afdesign b/figures/diversity.afdesign
index 807a904..5491315 100644
Binary files a/figures/diversity.afdesign and b/figures/diversity.afdesign differ
diff --git a/figures/subsampling.svg b/figures/subsampling.svg
index aaebcec..c6073fb 100644
--- a/figures/subsampling.svg
+++ b/figures/subsampling.svg
@@ -1,7 +1,7 @@
-
diff --git a/index.Rmd b/index.Rmd
index cf2940b..9f911cd 100644
--- a/index.Rmd
+++ b/index.Rmd
@@ -28,6 +28,7 @@ opts_chunk$set(echo = FALSE,
# Summary
+- The MetabarSchool Package
- What do the reading numbers per PCR mean?
- Rarefaction vs. relative frequencies
- alpha diversity metrics
@@ -35,6 +36,28 @@ opts_chunk$set(echo = FALSE,
- multidimentionnal analysis
- comparison between datasets
+# The MetabarSchool Package
+
+## Instaling the package
+
+You need the *devtools* package
+
+```{r eval=FALSE, echo=TRUE}
+install.packages("devtools",dependencies = TRUE)
+```
+
+Then you can install *MetabarSchool*
+
+```{r eval=FALSE, echo=TRUE}
+devtools::install_git("https://git.metabarcoding.org/MetabarcodingSchool/biodiversity-metrics.git")
+```
+
+You will also need the *vegan* package
+
+```{r eval=FALSE, echo=TRUE}
+install.packages("vegan",dependencies = TRUE)
+```
+
# The dataset
## The mock community {.flexbox .vcenter .smaller}
@@ -68,6 +91,8 @@ data("positive.samples")
## Loading data
```{r echo=TRUE}
+library(MetabarSchool)
+
data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -94,6 +119,8 @@ positive.count[1:5,1:5]
## Loading data
```{r echo=TRUE}
+library(MetabarSchool)
+
data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -119,6 +146,8 @@ head(positive.samples,n=3)
## Loading data
```{r echo=TRUE}
+library(MetabarSchool)
+
data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -169,7 +198,7 @@ positive.motus = positive.motus[are.not.singleton,]
$`r nrow(positive.count)` \; PCRs \; \times \; `r ncol(positive.count)` \; MOTUs$
matrix
-## Not all the PCR have the number of reads {.flexbox .vcenter}
+## Not all the PCR have the same number of reads {.flexbox .vcenter}
Despite all standardization efforts
@@ -240,13 +269,19 @@ positive.count.rarefied = rrarefy(positive.count,2000)
## Rarefying read count (2) {.flexbox .vcenter}
-```{r fig.height=3}
+```{r fig.height=4}
par(mfrow=c(1,2),bg=NA)
hist(log10(colSums(positive.count)+1),
main = "Not rarefied",
+ xlim = c(0,6),
+ ylim = c(0,2300),
+ breaks = 30,
xlab = TeX("$\\log_{10}(reads per MOTUs)$"))
hist(log10(colSums(positive.count.rarefied)+1),
main = "Rarefied data",
+ xlim = c(0,6),
+ ylim = c(0,2300),
+ breaks = 30,
xlab = TeX("$\\log_{10}(reads per MOTUs)$"))
```
@@ -325,11 +360,11 @@ knitr::include_graphics("figures/diversity.svg")
@Whittaker:10:00
-- $\alpha-diversity$ : Mean diversity per site ($species/site$)
+- $\alpha\text{-diversity}$ : Mean diversity per site ($species/site$)
-- $\gamma-diversity$ : Regional biodiversity ($species/region$)
+- $\gamma\text{-diversity}$ : Regional biodiversity ($species/region$)
-- $\beta-diversity$ : $\beta = \frac{\gamma}{\alpha}$ ($site$)
+- $\beta\text{-diversity}$ : $\beta = \frac{\gamma}{\alpha}$ ($sites/region$)
@@ -410,25 +445,19 @@ kable(data.frame(`Gini-Simpson`=GS),
kable_styling(position = "center")
```
-## Shanon entropy {.smaller}
+## Shannon entropy {.smaller}
-
You need the devtools package
+ +install.packages("devtools",dependencies = TRUE)
+
+Then you can install MetabarSchool
+ +devtools::install_git("https://git.metabarcoding.org/MetabarcodingSchool/biodiversity-metrics.git")
+
+You will also need the vegan package
+ +install.packages("vegan",dependencies = TRUE)
+
192 PCR of the mock community using SPER02 trnL-P6-Loop primers
192 PCR of the mock community using SPER02 trnL-P6-Loop primers
+ +6 dilutions of the mock community: 1/1, 1/2, 1/4, 1/8, 1/16, 1/32
32 repeats per dilution
data("positive.count")
+library(MetabarSchool)
+
+data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -880,7 +905,9 @@ sample.TM_POS_d16_2_a_A1
data("positive.count")
+library(MetabarSchool)
+
+data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -992,7 +1019,9 @@ sample.TM_POS_d16_1_b_A2
data("positive.count")
+library(MetabarSchool)
+
+data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -1212,11 +1241,11 @@ positive.motus = positive.motus[are.not.singleton,]
positive.count is now a \(192 \; PCRs \; \times \; 5579 \; MOTUs\) matrixDespite all standardization efforts
-

Is it related to the amount of DNA in the extract ?




## are.still.present ## FALSE TRUE -## 1942 3637+## 1886 3693
par(bg=NA) boxplot(colSums(positive.count) ~ are.still.present, log="y")-


The MOTUs removed by rarefaction were at most occurring 21 times
+The MOTUs removed by rarefaction were at most occurring 13 times
The MOTUs kept by rarefaction were at least occurring 2 times
@@ -1306,7 +1335,7 @@ positive.motus.rare = positive.motus[are.still.present,]
Whittaker (2010)
\(\alpha-diversity\) : Mean diversity per site (\(species/site\))
\(\gamma-diversity\) : Regional biodiversity (\(species/region\))
\(\beta-diversity\) : \(\beta = \frac{\gamma}{\alpha}\) (\(site\))
\(\alpha\text{-diversity}\) : Mean diversity per site (\(species/site\))
\(\gamma\text{-diversity}\) : Regional biodiversity (\(species/region\))
\(\beta\text{-diversity}\) : \(\beta = \frac{\gamma}{\alpha}\) (\(sites/region\))
The Simpson's index is the probability of having the same species twice when you randomly select two specimens.
The Simpson’s index is the probability of having the same species twice when you randomly select two specimens.
\[ @@ -1597,7 +1626,7 @@ Environment.2
\(\lambda\) decrease when complexity of your ecosystem increase.
-Gini-Simpson's index defined as \(1-\lambda\) increase with diversity
+Gini-Simpson’s index defined as \(1-\lambda\) increase with diversity
Shanon entropy is based on information theory.
+Shannon entropy is based on information theory:
-Let \(X\) be a uniformly distributed random variable with values in \(A\)
+\[ -H(X) = \log|A| +\(H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}\) + +
if \(A\) is a community where every species are equally represented then \[ +H(A) = \log|A| \]
-\[
-H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}
-\]
As : \[
-H(X) = \log|A| \;\Rightarrow\; ^1D = e^{H(X)}
+H(A) = \log|A| \;\Rightarrow\; ^1D = e^{H(A)}
\]
where \(^1D\) is the theoretical number of species in a evenly distributed community that would have the same Shanon's entropy than ours.
where \(^1D\) is the theoretical number of species in a evenly distributed community that would have the same Shannon’s entropy than ours.
log_q function

\[ -^qH = - \sum_{i=1}^S pi \times ^q\log pi +^qH = - \sum_{i=1}^S p_i \; ^q\log p_i \]
H_q = function(x,q=1) {
sum(x * log_q(1/x,q),na.rm = TRUE)
}
-and generalized the previously presented Hill's number
+and generalized the previously presented Hill’s number
\[ ^qD=^qe^{^qH} @@ -1908,22 +1933,22 @@ qs = seq(from=0,to=3,by=0.1) environments.hq = apply(environments,MARGIN = 1,H_spectrum,q=qs) environments.dq = apply(environments,MARGIN = 1,D_spectrum,q=qs) -


\(^0H(X) = S - 1\) : the richness minus one.
\(^1H(X) = H^{\prime}\) : the Shanon's entropy.
\(^2H(X) = 1 - \lambda\) : Gini-Simpson's index.
\(^1H(X) = H^{\prime}\) : the Shannon’s entropy.
\(^2H(X) = 1 - \lambda\) : Gini-Simpson’s index.
\(^0D(X) = S\) : The richness.
\(^1D(X) = e^{H^{\prime}}\) : The number of species in an even community having the same \(H^{\prime}\).
\(^2D(X) = 1 / \lambda\) : The number of species in an even community having the same Gini-Simpson's index.
\(^2D(X) = 1 / \lambda\) : The number of species in an even community having the same Gini-Simpson’s index.
H.mock = H_spectrum(plants.16$dilution,qs) D.mock = D_spectrum(plants.16$dilution,qs)-












You can generalize those distances as a norm of order \(k\)
@@ -2140,7 +2165,7 @@ d(x,z)\leq \max(d(x,y),d(y,z))s = tag_bad_pcr(guiana.samples$sample,guiana.count)-


guiana.count.clean = guiana.count[s$keep,] guiana.samples.clean = guiana.samples[s$keep,]@@ -2182,7 +2207,7 @@ guiana.samples.clean = guiana.samples[s$keep,]
s = tag_bad_pcr(guiana.samples.clean$sample,guiana.count.clean)-


guiana.count.clean = guiana.count.clean[s$keep,] guiana.samples.clean = guiana.samples.clean[s$keep,]@@ -2197,7 +2222,7 @@ guiana.samples.clean = guiana.samples.clean[s$keep,]
s = tag_bad_pcr(guiana.samples.clean$sample,guiana.count.clean)-


guiana.count.clean = guiana.count.clean[s$keep,] guiana.samples.clean = guiana.samples.clean[s$keep,]@@ -2264,7 +2289,7 @@ xy = xy[,1:2] xy.hellinger = decostand(xy,method = "hellinger")


\[ -BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}} +BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}} \]
\[ -BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{1+1} +BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{1+1} \]
\[ -BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}]N_{ij} - N_{ik}| +BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}|N_{ij} - N_{ik}| \]


guiana.hellinger.pca = prcomp(guiana.hellinger.final,center = TRUE, scale. = FALSE)-



