Some corrections on the lecture
This commit is contained in:
139
index.Rmd
139
index.Rmd
@@ -28,6 +28,7 @@ opts_chunk$set(echo = FALSE,
|
||||
|
||||
# Summary
|
||||
|
||||
- The MetabarSchool Package
|
||||
- What do the reading numbers per PCR mean?
|
||||
- Rarefaction vs. relative frequencies
|
||||
- alpha diversity metrics
|
||||
@@ -35,6 +36,28 @@ opts_chunk$set(echo = FALSE,
|
||||
- multidimentionnal analysis
|
||||
- comparison between datasets
|
||||
|
||||
# The MetabarSchool Package
|
||||
|
||||
## Instaling the package
|
||||
|
||||
You need the *devtools* package
|
||||
|
||||
```{r eval=FALSE, echo=TRUE}
|
||||
install.packages("devtools",dependencies = TRUE)
|
||||
```
|
||||
|
||||
Then you can install *MetabarSchool*
|
||||
|
||||
```{r eval=FALSE, echo=TRUE}
|
||||
devtools::install_git("https://git.metabarcoding.org/MetabarcodingSchool/biodiversity-metrics.git")
|
||||
```
|
||||
|
||||
You will also need the *vegan* package
|
||||
|
||||
```{r eval=FALSE, echo=TRUE}
|
||||
install.packages("vegan",dependencies = TRUE)
|
||||
```
|
||||
|
||||
# The dataset
|
||||
|
||||
## The mock community {.flexbox .vcenter .smaller}
|
||||
@@ -68,6 +91,8 @@ data("positive.samples")
|
||||
## Loading data
|
||||
|
||||
```{r echo=TRUE}
|
||||
library(MetabarSchool)
|
||||
|
||||
data("positive.count")
|
||||
data("positive.samples")
|
||||
data("positive.motus")
|
||||
@@ -94,6 +119,8 @@ positive.count[1:5,1:5]
|
||||
## Loading data
|
||||
|
||||
```{r echo=TRUE}
|
||||
library(MetabarSchool)
|
||||
|
||||
data("positive.count")
|
||||
data("positive.samples")
|
||||
data("positive.motus")
|
||||
@@ -119,6 +146,8 @@ head(positive.samples,n=3)
|
||||
## Loading data
|
||||
|
||||
```{r echo=TRUE}
|
||||
library(MetabarSchool)
|
||||
|
||||
data("positive.count")
|
||||
data("positive.samples")
|
||||
data("positive.motus")
|
||||
@@ -169,7 +198,7 @@ positive.motus = positive.motus[are.not.singleton,]
|
||||
$`r nrow(positive.count)` \; PCRs \; \times \; `r ncol(positive.count)` \; MOTUs$
|
||||
matrix
|
||||
|
||||
## Not all the PCR have the number of reads {.flexbox .vcenter}
|
||||
## Not all the PCR have the same number of reads {.flexbox .vcenter}
|
||||
|
||||
Despite all standardization efforts
|
||||
|
||||
@@ -240,13 +269,19 @@ positive.count.rarefied = rrarefy(positive.count,2000)
|
||||
|
||||
## Rarefying read count (2) {.flexbox .vcenter}
|
||||
|
||||
```{r fig.height=3}
|
||||
```{r fig.height=4}
|
||||
par(mfrow=c(1,2),bg=NA)
|
||||
hist(log10(colSums(positive.count)+1),
|
||||
main = "Not rarefied",
|
||||
xlim = c(0,6),
|
||||
ylim = c(0,2300),
|
||||
breaks = 30,
|
||||
xlab = TeX("$\\log_{10}(reads per MOTUs)$"))
|
||||
hist(log10(colSums(positive.count.rarefied)+1),
|
||||
main = "Rarefied data",
|
||||
xlim = c(0,6),
|
||||
ylim = c(0,2300),
|
||||
breaks = 30,
|
||||
xlab = TeX("$\\log_{10}(reads per MOTUs)$"))
|
||||
```
|
||||
|
||||
@@ -325,11 +360,11 @@ knitr::include_graphics("figures/diversity.svg")
|
||||
@Whittaker:10:00
|
||||
<br><br><br><br>
|
||||
|
||||
- $\alpha-diversity$ : Mean diversity per site ($species/site$)
|
||||
- $\alpha\text{-diversity}$ : Mean diversity per site ($species/site$)
|
||||
|
||||
- $\gamma-diversity$ : Regional biodiversity ($species/region$)
|
||||
- $\gamma\text{-diversity}$ : Regional biodiversity ($species/region$)
|
||||
|
||||
- $\beta-diversity$ : $\beta = \frac{\gamma}{\alpha}$ ($site$)
|
||||
- $\beta\text{-diversity}$ : $\beta = \frac{\gamma}{\alpha}$ ($sites/region$)
|
||||
|
||||
</div>
|
||||
|
||||
@@ -410,25 +445,19 @@ kable(data.frame(`Gini-Simpson`=GS),
|
||||
kable_styling(position = "center")
|
||||
```
|
||||
|
||||
## Shanon entropy {.smaller}
|
||||
## Shannon entropy {.smaller}
|
||||
|
||||
<div style="float: left; width: 65%;">
|
||||
Shanon entropy is based on information theory.
|
||||
Shannon entropy is based on information theory:
|
||||
|
||||
Let $X$ be a uniformly distributed random variable with values in $A$
|
||||
<center>
|
||||
$H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}$
|
||||
</center>
|
||||
|
||||
$$
|
||||
H(X) = \log|A|
|
||||
$$
|
||||
|
||||
<br>
|
||||
</div>
|
||||
<div style="float: right; width: 35%;">
|
||||
if $A$ is a community where every species are equally represented then
|
||||
$$
|
||||
H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}
|
||||
H(A) = \log|A|
|
||||
$$
|
||||
<br>
|
||||
</div>
|
||||
|
||||
<center>
|
||||
```{r out.width = "400px"}
|
||||
@@ -441,7 +470,7 @@ H = - rowSums(environments * log(environments),na.rm = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
kable(data.frame(`Shanon index`=H),
|
||||
kable(data.frame(`Shannon index`=H),
|
||||
format="html",
|
||||
align = 'rr') %>%
|
||||
kable_styling(position = "center")
|
||||
@@ -452,12 +481,12 @@ kable(data.frame(`Shanon index`=H),
|
||||
<div style="float: left; width: 50%;">
|
||||
As :
|
||||
$$
|
||||
H(X) = \log|A| \;\Rightarrow\; ^1D = e^{H(X)}
|
||||
H(A) = \log|A| \;\Rightarrow\; ^1D = e^{H(A)}
|
||||
$$
|
||||
<br>
|
||||
</div>
|
||||
<div style="float: right; width: 50%;">
|
||||
where $^1D$ is the theoretical number of species in a evenly distributed community that would have the same Shanon's entropy than ours.
|
||||
where $^1D$ is the theoretical number of species in a evenly distributed community that would have the same Shannon's entropy than ours.
|
||||
</div>
|
||||
|
||||
<center>
|
||||
@@ -548,10 +577,10 @@ exp_q = function(x,q=1) {
|
||||
}
|
||||
```
|
||||
|
||||
## Generalised Shanon entropy
|
||||
## Generalised Shannon entropy
|
||||
|
||||
$$
|
||||
^qH = - \sum_{i=1}^S pi \times ^q\log pi
|
||||
^qH = - \sum_{i=1}^S p_i \; ^q\log p_i
|
||||
$$
|
||||
|
||||
```{r echo=TRUE, eval=FALSE}
|
||||
@@ -616,7 +645,7 @@ abline(v=c(0,1,2),lty=2,col=4:6)
|
||||
|
||||
- $^0H(X) = S - 1$ : the richness minus one.
|
||||
|
||||
- $^1H(X) = H^{\prime}$ : the Shanon's entropy.
|
||||
- $^1H(X) = H^{\prime}$ : the Shannon's entropy.
|
||||
|
||||
- $^2H(X) = 1 - \lambda$ : Gini-Simpson's index.
|
||||
|
||||
@@ -1074,15 +1103,15 @@ BC_{jk}=\frac{\sum _{i=1}^{p}(N_{ij} - min(N_{ij},N_{ik}) + (N_{ik} - min(N_{ij}
|
||||
$$
|
||||
|
||||
$$
|
||||
BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}}
|
||||
BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}}
|
||||
$$
|
||||
|
||||
$$
|
||||
BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{1+1}
|
||||
BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{1+1}
|
||||
$$
|
||||
|
||||
$$
|
||||
BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}]N_{ij} - N_{ik}|
|
||||
BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}|N_{ij} - N_{ik}|
|
||||
$$
|
||||
|
||||
## Principale coordinate analysis (1) {.flexbox .vcenter}
|
||||
@@ -1109,7 +1138,7 @@ plot(guiana.bc.pcoa$points[,1:2],
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Bray Curtis on Rel. Freqs")
|
||||
plot(guiana.euc.pcoa$points[,1:2],
|
||||
plot(guiana.euc.pcoa$points[,1],-guiana.euc.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
@@ -1123,7 +1152,7 @@ plot(guiana.jac.1.pcoa$points[,1:2],
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Jaccard on presence (0.1%)")
|
||||
plot(guiana.jac.10.pcoa$points[,1:2],
|
||||
plot(-guiana.jac.10.pcoa$points[,1],guiana.jac.10.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
@@ -1162,6 +1191,58 @@ plot(0,type='n',axes=FALSE,ann=FALSE)
|
||||
legend("topleft",legend = levels(samples.type),fill = 1:4,cex=1.2)
|
||||
```
|
||||
|
||||
<!---
|
||||
## Computation of norms
|
||||
|
||||
```{r guiana_norm, echo=TRUE}
|
||||
guiana.n1.dist = norm(guiana.relfreq.final,l=1)
|
||||
guiana.n2.dist = norm(guiana.relfreq.final^(1/2),l=2)
|
||||
guiana.n3.dist = norm(guiana.relfreq.final^(1/3),l=3)
|
||||
guiana.n4.dist = norm(guiana.relfreq.final^(1/100),l=100)
|
||||
```
|
||||
|
||||
## pCoA on norms
|
||||
|
||||
```{r dependson="guiana_norm"}
|
||||
guiana.n1.pcoa = cmdscale(guiana.n1.dist,k=3,eig = TRUE)
|
||||
guiana.n2.pcoa = cmdscale(guiana.n2.dist,k=3,eig = TRUE)
|
||||
guiana.n3.pcoa = cmdscale(guiana.n3.dist,k=3,eig = TRUE)
|
||||
guiana.n4.pcoa = cmdscale(guiana.n4.dist,k=3,eig = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(mfrow=c(2,3),bg=NA)
|
||||
plot(guiana.n1.pcoa$points[,1],guiana.n1.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Norm 1 on Hellinger")
|
||||
plot(guiana.n2.pcoa$points[,1],-guiana.n2.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Norm 2 on Hellinger")
|
||||
plot(0,type='n',axes=FALSE,ann=FALSE)
|
||||
legend("topleft",legend = levels(samples.type),fill = 1:4,cex=1.2)
|
||||
plot(-guiana.n3.pcoa$points[,1],-guiana.n3.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Norm 3 on Hellinger")
|
||||
plot(-guiana.n4.pcoa$points[,1],-guiana.n4.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Norm 4 on Hellinger")
|
||||
|
||||
```
|
||||
|
||||
--->
|
||||
|
||||
## Comparing diversity of the environments
|
||||
|
||||
```{r}
|
||||
|
||||
Reference in New Issue
Block a user