Compare commits
4 Commits
Tromso-201
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| f6431654dc | |||
| 050956c01b | |||
| cf80ff9f6a | |||
| 657aaa734d |
2125
Lecture.html
Normal file
2125
Lecture.html
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,16 +1,16 @@
|
||||
---
|
||||
title: "Biodiversity metrics \ and metabarcoding"
|
||||
author: "Eric Coissac"
|
||||
date: "28/01/2019"
|
||||
date: "02/02/2024"
|
||||
bibliography: inst/REFERENCES.bib
|
||||
output:
|
||||
ioslides_presentation:
|
||||
widescreen: true
|
||||
format:
|
||||
revealjs:
|
||||
smaller: true
|
||||
css: slides.css
|
||||
mathjax: local
|
||||
self_contained: false
|
||||
slidy_presentation: default
|
||||
transition: slide
|
||||
scrollable: true
|
||||
theme: simple
|
||||
html-math-method: mathjax
|
||||
editor: visual
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
@@ -18,15 +18,16 @@ library(knitr)
|
||||
library(tidyverse)
|
||||
library(kableExtra)
|
||||
library(latex2exp)
|
||||
library(MetabarSchool)
|
||||
|
||||
opts_chunk$set(echo = FALSE,
|
||||
cache = TRUE,
|
||||
cache.lazy = FALSE)
|
||||
```
|
||||
|
||||
|
||||
# Summary
|
||||
|
||||
- The MetabarSchool Package
|
||||
- What do the reading numbers per PCR mean?
|
||||
- Rarefaction vs. relative frequencies
|
||||
- alpha diversity metrics
|
||||
@@ -34,6 +35,28 @@ opts_chunk$set(echo = FALSE,
|
||||
- multidimentionnal analysis
|
||||
- comparison between datasets
|
||||
|
||||
# The MetabarSchool Package
|
||||
|
||||
## Installing the package
|
||||
|
||||
You need the *devtools* package
|
||||
|
||||
```{r eval=FALSE, echo=TRUE}
|
||||
install.packages("devtools",dependencies = TRUE)
|
||||
```
|
||||
|
||||
Then you can install *MetabarSchool*
|
||||
|
||||
```{r eval=FALSE, echo=TRUE}
|
||||
devtools::install_git("https://git.metabarcoding.org/MetabarcodingSchool/biodiversity-metrics.git")
|
||||
```
|
||||
|
||||
You will also need the *vegan* package
|
||||
|
||||
```{r eval=FALSE, echo=TRUE}
|
||||
install.packages("vegan",dependencies = TRUE)
|
||||
```
|
||||
|
||||
# The dataset
|
||||
|
||||
## The mock community {.flexbox .vcenter .smaller}
|
||||
@@ -59,22 +82,21 @@ data("positive.samples")
|
||||
|
||||
- `r nrow(positive.samples)` PCR of the mock community using SPER02 trnL-P6-Loop primers
|
||||
|
||||
- `r length(table(positive.samples$dilution))` dilutions of the mock
|
||||
community: `r paste0('1/',names(table(positive.samples$dilution)))`
|
||||
- `r length(table(positive.samples$dilution))` dilutions of the mock community: `r paste0('1/',names(table(positive.samples$dilution)))`
|
||||
|
||||
- `r as.numeric(table(positive.samples$dilution)[1])` repeats per dilution
|
||||
|
||||
## Loading data
|
||||
|
||||
```{r echo=TRUE}
|
||||
library(MetabarSchool)
|
||||
|
||||
data("positive.count")
|
||||
data("positive.samples")
|
||||
data("positive.motus")
|
||||
```
|
||||
|
||||
- `positive.count` read count matrix
|
||||
$`r nrow(positive.count)` \; PCRs \; \times \; `r ncol(positive.count)` \; MOTUs$
|
||||
|
||||
- `positive.count` read count matrix $`r nrow(positive.count)` \; PCRs \; \times \; `r ncol(positive.count)` \; MOTUs$
|
||||
|
||||
```{r}
|
||||
knitr::kable(positive.count[1:5,1:5],
|
||||
@@ -85,22 +107,22 @@ knitr::kable(positive.count[1:5,1:5],
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
```{r echo=TRUE,eval=FALSE}
|
||||
positive.count[1:5,1:5]
|
||||
```
|
||||
|
||||
|
||||
## Loading data
|
||||
|
||||
```{r echo=TRUE}
|
||||
library(MetabarSchool)
|
||||
|
||||
data("positive.count")
|
||||
data("positive.samples")
|
||||
data("positive.motus")
|
||||
```
|
||||
|
||||
- `positive.samples` a `r nrow(positive.samples)` rows `data.frame` of
|
||||
`r ncol(positive.samples)` columns describing each PCR
|
||||
|
||||
- `positive.samples` a `r nrow(positive.samples)` rows `data.frame` of `r ncol(positive.samples)` columns describing each PCR
|
||||
|
||||
```{r}
|
||||
knitr::kable(head(positive.samples,n=3),
|
||||
@@ -110,21 +132,22 @@ knitr::kable(head(positive.samples,n=3),
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
```{r echo=TRUE,eval=FALSE}
|
||||
head(positive.samples,n=3)
|
||||
```
|
||||
|
||||
|
||||
## Loading data
|
||||
|
||||
```{r echo=TRUE}
|
||||
library(MetabarSchool)
|
||||
|
||||
data("positive.count")
|
||||
data("positive.samples")
|
||||
data("positive.motus")
|
||||
```
|
||||
|
||||
- `positive.motus` : a `r nrow(positive.motus)` rows `data.frame` of
|
||||
`r ncol(positive.motus)` columns describing each MOTU
|
||||
- `positive.motus` : a `r nrow(positive.motus)` rows `data.frame` of `r ncol(positive.motus)` columns describing each MOTU
|
||||
|
||||
```{r}
|
||||
knitr::kable(head(positive.motus,n=3),
|
||||
@@ -134,6 +157,7 @@ knitr::kable(head(positive.motus,n=3),
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
```{r echo=TRUE,eval=FALSE}
|
||||
head(positive.motus,n=3)
|
||||
```
|
||||
@@ -146,7 +170,6 @@ Singleton sequences are observed only once over the complete dataset.
|
||||
table(colSums(positive.count) == 1)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
kable(t(table(colSums(positive.count) == 1)),
|
||||
format = "html") %>%
|
||||
@@ -164,11 +187,9 @@ positive.count = positive.count[,are.not.singleton]
|
||||
positive.motus = positive.motus[are.not.singleton,]
|
||||
```
|
||||
|
||||
- `positive.count` is now a
|
||||
$`r nrow(positive.count)` \; PCRs \; \times \; `r ncol(positive.count)` \; MOTUs$
|
||||
matrix
|
||||
- `positive.count` is now a $`r nrow(positive.count)` \; PCRs \; \times \; `r ncol(positive.count)` \; MOTUs$ matrix
|
||||
|
||||
## Not all the PCR have the number of reads {.flexbox .vcenter}
|
||||
## Not all the PCR have the same number of reads {.flexbox .vcenter}
|
||||
|
||||
Despite all standardization efforts
|
||||
|
||||
@@ -180,9 +201,9 @@ hist(rowSums(positive.count),
|
||||
main = "Number of read per PCR")
|
||||
```
|
||||
|
||||
<div class="green">
|
||||
::: green
|
||||
Is it related to the amount of DNA in the extract ?
|
||||
</div>
|
||||
:::
|
||||
|
||||
## What do the reading numbers per PCR mean? {.smaller}
|
||||
|
||||
@@ -192,17 +213,13 @@ boxplot(rowSums(positive.count) ~ positive.samples$dilution,log="y")
|
||||
abline(h = median(rowSums(positive.count)),lw=2,col="red",lty=2)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
SC = summary(aov((rowSums(positive.count)) ~ positive.samples$dilution))[[1]]$`Sum Sq`
|
||||
```
|
||||
|
||||
<div class="red2">
|
||||
<center>
|
||||
Only `r round((SC/sum(SC)*100)[1],1)`% of the PCR read count
|
||||
variation is explain by dilution
|
||||
</center>
|
||||
</div>
|
||||
::: red2
|
||||
<center>Only `r round((SC/sum(SC)*100)[1],1)`% of the PCR read count variation is explain by dilution</center>
|
||||
:::
|
||||
|
||||
## You must normalize your read counts
|
||||
|
||||
@@ -212,7 +229,6 @@ Two options:
|
||||
|
||||
Randomly subsample the same number of reads for all the PCRs
|
||||
|
||||
|
||||
### Relative frequencies
|
||||
|
||||
Divide the read count of each MOTU in each sample by the total total read count of the same sample
|
||||
@@ -239,13 +255,19 @@ positive.count.rarefied = rrarefy(positive.count,2000)
|
||||
|
||||
## Rarefying read count (2) {.flexbox .vcenter}
|
||||
|
||||
```{r fig.height=3}
|
||||
```{r fig.height=4}
|
||||
par(mfrow=c(1,2),bg=NA)
|
||||
hist(log10(colSums(positive.count)+1),
|
||||
main = "Not rarefied",
|
||||
xlim = c(0,6),
|
||||
ylim = c(0,2300),
|
||||
breaks = 30,
|
||||
xlab = TeX("$\\log_{10}(reads per MOTUs)$"))
|
||||
hist(log10(colSums(positive.count.rarefied)+1),
|
||||
main = "Rarefied data",
|
||||
xlim = c(0,6),
|
||||
ylim = c(0,2300),
|
||||
breaks = 30,
|
||||
xlab = TeX("$\\log_{10}(reads per MOTUs)$"))
|
||||
```
|
||||
|
||||
@@ -282,9 +304,7 @@ positive.count.rarefied = positive.count.rarefied[,are.still.present]
|
||||
positive.motus.rare = positive.motus[are.still.present,]
|
||||
```
|
||||
|
||||
<center>
|
||||
positive.motus.rare is now a $`r nrow(positive.count.rarefied)` \; PCRs \; \times \; `r ncol(positive.count.rarefied)` \; MOTUs$
|
||||
</center>
|
||||
<center>positive.motus.rare is now a $`r nrow(positive.count.rarefied)` \; PCRs \; \times \; `r ncol(positive.count.rarefied)` \; MOTUs$</center>
|
||||
|
||||
## Why rarefying ? {.vcenter .columns-2}
|
||||
|
||||
@@ -292,8 +312,7 @@ positive.motus.rare is now a $`r nrow(positive.count.rarefied)` \; PCRs \; \time
|
||||
knitr::include_graphics("figures/subsampling.svg")
|
||||
```
|
||||
|
||||
<br><br><br><br>
|
||||
Increasing the number of reads just increase the description of the subpart of the PCR you have sequenced.
|
||||
<br><br><br><br> Increasing the number of reads just increase the description of the subpart of the PCR you have sequenced.
|
||||
|
||||
## Transforming read counts to relative frequencies
|
||||
|
||||
@@ -312,26 +331,21 @@ table(colSums(positive.count.relfreq) == 0)
|
||||
|
||||
## The different types of diversity {.vcenter}
|
||||
|
||||
<div style="float: left; width: 40%;">
|
||||
::: {style="float: left; width: 40%;"}
|
||||
```{r}
|
||||
knitr::include_graphics("figures/diversity.svg")
|
||||
```
|
||||
</div>
|
||||
:::
|
||||
|
||||
<div style="float: left; width: 60%;">
|
||||
::: {style="float: left; width: 60%;"}
|
||||
<br><br> @Whittaker:10:00 <br><br><br><br>
|
||||
|
||||
<br><br>
|
||||
@Whittaker:10:00
|
||||
<br><br><br><br>
|
||||
- $\alpha\text{-diversity}$ : Mean diversity per site ($species/site$)
|
||||
|
||||
- $\alpha-diversity$ : Mean diversity per site ($species/site$)
|
||||
|
||||
- $\gamma-diversity$ : Regional biodiversity ($species/region$)
|
||||
|
||||
- $\beta-diversity$ : $\beta = \frac{\gamma}{\alpha}$ ($site$)
|
||||
|
||||
</div>
|
||||
- $\gamma\text{-diversity}$ : Regional biodiversity ($species/region$)
|
||||
|
||||
- $\beta\text{-diversity}$ : $\beta = \frac{\gamma}{\alpha}$ ($sites/region$)
|
||||
:::
|
||||
|
||||
# $\alpha$-diversity
|
||||
|
||||
@@ -341,7 +355,6 @@ knitr::include_graphics("figures/diversity.svg")
|
||||
knitr::include_graphics("figures/alpha_diversity.svg")
|
||||
```
|
||||
|
||||
|
||||
```{r out.width = "400px"}
|
||||
E1 = c(A=0.25,B=0.25,C=0.25,D=0.25,E=0,F=0,G=0)
|
||||
E2 = c(A=0.55,B=0.07,C=0.02,D=0.17,E=0.07,F=0.07,G=0.03)
|
||||
@@ -352,7 +365,6 @@ kable(environments,
|
||||
kable_styling(position = "center")
|
||||
```
|
||||
|
||||
|
||||
## Richness {.flexbox .vcenter}
|
||||
|
||||
The actual number of species present in your environement whatever their aboundances
|
||||
@@ -374,17 +386,15 @@ kable(data.frame(S=S),
|
||||
|
||||
## Gini-Simpson's index {.smaller}
|
||||
|
||||
<div style="float: left; width: 60%;">
|
||||
The Simpson's index is the probability of having the same species twice when you randomly select two specimens.
|
||||
<br>
|
||||
<br>
|
||||
</div>
|
||||
<div style="float: right; width: 40%;">
|
||||
::: {style="float: left; width: 60%;"}
|
||||
The Simpson's index is the probability of having the same species twice when you randomly select two specimens. <br> <br>
|
||||
:::
|
||||
|
||||
::: {style="float: right; width: 40%;"}
|
||||
$$
|
||||
\lambda =\sum _{i=1}^{S}p_{i}^{2}
|
||||
$$
|
||||
<br>
|
||||
</div>
|
||||
$$ <br>
|
||||
:::
|
||||
|
||||
<center>
|
||||
|
||||
@@ -409,30 +419,22 @@ kable(data.frame(`Gini-Simpson`=GS),
|
||||
kable_styling(position = "center")
|
||||
```
|
||||
|
||||
## Shanon entropy {.smaller}
|
||||
## Shannon entropy {.smaller}
|
||||
|
||||
<div style="float: left; width: 65%;">
|
||||
Shanon entropy is based on information theory.
|
||||
Shannon entropy is based on information theory:
|
||||
|
||||
Let $X$ be a uniformly distributed random variable with values in $A$
|
||||
<center>$H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}$</center>
|
||||
|
||||
if $A$ is a community where every species are equally represented then $$
|
||||
H(A) = \log|A|
|
||||
$$
|
||||
H(X) = \log|A|
|
||||
$$
|
||||
|
||||
<br>
|
||||
</div>
|
||||
<div style="float: right; width: 35%;">
|
||||
$$
|
||||
H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}
|
||||
$$
|
||||
<br>
|
||||
</div>
|
||||
|
||||
<center>
|
||||
|
||||
```{r out.width = "400px"}
|
||||
knitr::include_graphics("figures/alpha_diversity.svg")
|
||||
```
|
||||
|
||||
</center>
|
||||
|
||||
```{r echo=TRUE}
|
||||
@@ -440,7 +442,7 @@ H = - rowSums(environments * log(environments),na.rm = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
kable(data.frame(`Shanon index`=H),
|
||||
kable(data.frame(`Shannon index`=H),
|
||||
format="html",
|
||||
align = 'rr') %>%
|
||||
kable_styling(position = "center")
|
||||
@@ -448,23 +450,24 @@ kable(data.frame(`Shanon index`=H),
|
||||
|
||||
## Hill's number {.smaller}
|
||||
|
||||
<div style="float: left; width: 50%;">
|
||||
As :
|
||||
$$
|
||||
H(X) = \log|A| \;\Rightarrow\; ^2D = e^{H(X)}
|
||||
$$
|
||||
<br>
|
||||
</div>
|
||||
<div style="float: right; width: 50%;">
|
||||
where $^2D$ is the theoretical number of species in a evenly distributed community that would have the same Shanon's entropy than ours.
|
||||
</div>
|
||||
::: {style="float: left; width: 50%;"}
|
||||
As : $$
|
||||
H(A) = \log|A| \;\Rightarrow\; ^1D = e^{H(A)}
|
||||
$$ <br>
|
||||
:::
|
||||
|
||||
::: {style="float: right; width: 50%;"}
|
||||
where $^1D$ is the theoretical number of species in a evenly distributed community that would have the same Shannon's entropy than ours.
|
||||
:::
|
||||
|
||||
<center>
|
||||
<BR>
|
||||
<BR>
|
||||
|
||||
<BR> <BR>
|
||||
|
||||
```{r out.width = "400px"}
|
||||
knitr::include_graphics("figures/alpha_diversity.svg")
|
||||
```
|
||||
|
||||
</center>
|
||||
|
||||
```{r echo=TRUE}
|
||||
@@ -483,7 +486,7 @@ kable(data.frame(`Hill Numbers`=D2),
|
||||
Based on the generalized entropy @Tsallis:94:00 we can propose a generalized form of logarithm.
|
||||
|
||||
$$
|
||||
^q\log(x) = \frac{x^{(1-q)}}{1-q}
|
||||
^q\log(x) = \frac{x^{(1-q)}-1}{1-q}
|
||||
$$
|
||||
|
||||
The function is not defined for $q=1$ but when $q \longrightarrow 1\;,\; ^q\log(x) \longrightarrow \log(x)$
|
||||
@@ -491,14 +494,14 @@ The function is not defined for $q=1$ but when $q \longrightarrow 1\;,\; ^q\log(
|
||||
$$
|
||||
^q\log(x) = \left\{
|
||||
\begin{align}
|
||||
\log(x),& \text{if } x = 1\\
|
||||
\frac{x^{(1-q)}}{1-q},& \text{otherwise}
|
||||
\log(x),& \text{if } q = 1\\
|
||||
\frac{x^{(1-q)}-1}{1-q},& \text{otherwise}
|
||||
\end{align}
|
||||
\right.
|
||||
$$
|
||||
|
||||
```{r echo=TRUE, eval=FALSE}
|
||||
log.q = function(x,q=1) {
|
||||
log_q = function(x,q=1) {
|
||||
if (q==1)
|
||||
log(x)
|
||||
else
|
||||
@@ -506,6 +509,28 @@ log.q = function(x,q=1) {
|
||||
}
|
||||
```
|
||||
|
||||
## Impact of $q$ on the `log_q` function
|
||||
|
||||
```{r}
|
||||
layout(matrix(c(1,1,2),nrow=1))
|
||||
qs = seq(0,5,by=1)
|
||||
x = seq(0.001,1,length.out = 100)
|
||||
plot(x,log_q(x,0),lty=2,lwd=3,col=0,type="l",
|
||||
ylab = TeX("$^q\\log$"),
|
||||
ylim=c(-10,0))
|
||||
|
||||
for (i in seq_along(qs)) {
|
||||
points(x,log_q(x,qs[i]),lty=1,lwd=1,col=i,type="l")
|
||||
}
|
||||
|
||||
points(x,log(x),lty=2,lwd=3,col="red",type="l")
|
||||
|
||||
plot(0,type='n',axes=FALSE,ann=FALSE)
|
||||
legend("topleft",legend = qs,fill = seq_along(qs),cex=1.5)
|
||||
|
||||
|
||||
```
|
||||
|
||||
## And its inverse function {.flexbox .vcenter}
|
||||
|
||||
$$
|
||||
@@ -516,8 +541,9 @@ $$
|
||||
\end{align}
|
||||
\right.
|
||||
$$
|
||||
|
||||
```{r echo=TRUE, eval=FALSE}
|
||||
exp.q = function(x,q=1) {
|
||||
exp_q = function(x,q=1) {
|
||||
if (q==1)
|
||||
exp(x)
|
||||
else
|
||||
@@ -525,41 +551,41 @@ exp.q = function(x,q=1) {
|
||||
}
|
||||
```
|
||||
|
||||
## Generalised Shanon entropy
|
||||
## Generalised Shannon entropy
|
||||
|
||||
$$
|
||||
^qH = - \sum_{i=1}^S pi \times ^q\log pi
|
||||
^qH = - \sum_{i=1}^S p_i \; ^q\log p_i
|
||||
$$
|
||||
|
||||
```{r echo=TRUE, eval=FALSE}
|
||||
H.q = function(x,q=1) {
|
||||
sum(x * log.q(1/x,q),na.rm = TRUE)
|
||||
H_q = function(x,q=1) {
|
||||
sum(x * log_q(1/x,q),na.rm = TRUE)
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
and generalized the previously presented Hill's number
|
||||
|
||||
$$
|
||||
^qD=^qe^{^qH}
|
||||
$$
|
||||
|
||||
```{r echo=TRUE, eval=FALSE}
|
||||
D.q = function(x,q=1) {
|
||||
exp.q(H.q(x,q),q)
|
||||
D_q = function(x,q=1) {
|
||||
exp_q(H_q(x,q),q)
|
||||
}
|
||||
```
|
||||
|
||||
## Biodiversity spectrum (1) {.flexbox .vcenter}
|
||||
|
||||
```{r echo=TRUE, eval=FALSE}
|
||||
H.spectrum = function(x,q=1) {
|
||||
sapply(q,function(Q) H.q(x,Q))
|
||||
H_spectrum = function(x,q=1) {
|
||||
sapply(q,function(Q) H_q(x,Q))
|
||||
}
|
||||
```
|
||||
|
||||
```{r echo=TRUE, eval=FALSE}
|
||||
D.spectrum = function(x,q=1) {
|
||||
sapply(q,function(Q) D.q(x,Q))
|
||||
D_spectrum = function(x,q=1) {
|
||||
sapply(q,function(Q) D_q(x,Q))
|
||||
}
|
||||
```
|
||||
|
||||
@@ -568,8 +594,8 @@ D.spectrum = function(x,q=1) {
|
||||
```{r echo=TRUE,warning=FALSE,error=FALSE}
|
||||
library(MetabarSchool)
|
||||
qs = seq(from=0,to=3,by=0.1)
|
||||
environments.hq = apply(environments,MARGIN = 1,H.spectrum,q=qs)
|
||||
environments.dq = apply(environments,MARGIN = 1,D.spectrum,q=qs)
|
||||
environments.hq = apply(environments,MARGIN = 1,H_spectrum,q=qs)
|
||||
environments.dq = apply(environments,MARGIN = 1,D_spectrum,q=qs)
|
||||
```
|
||||
|
||||
```{r}
|
||||
@@ -593,7 +619,7 @@ abline(v=c(0,1,2),lty=2,col=4:6)
|
||||
|
||||
- $^0H(X) = S - 1$ : the richness minus one.
|
||||
|
||||
- $^1H(X) = H^{\prime}$ : the Shanon's entropy.
|
||||
- $^1H(X) = H^{\prime}$ : the Shannon's entropy.
|
||||
|
||||
- $^2H(X) = 1 - \lambda$ : Gini-Simpson's index.
|
||||
|
||||
@@ -606,18 +632,20 @@ abline(v=c(0,1,2),lty=2,col=4:6)
|
||||
- $^2D(X) = 1 / \lambda$ : The number of species in an even community having the same Gini-Simpson's index.
|
||||
|
||||
<br>
|
||||
|
||||
<center>
|
||||
|
||||
$q$ can be considered as a penality you give to rare species
|
||||
|
||||
**when $q=0$ all the species have the same weight**
|
||||
**when** $q=0$ all the species have the same weight
|
||||
|
||||
</center>
|
||||
|
||||
## Biodiversity spectrum of the mock community
|
||||
|
||||
```{r echo=TRUE}
|
||||
H.mock = H.spectrum(plants.16$dilution,qs)
|
||||
D.mock = D.spectrum(plants.16$dilution,qs)
|
||||
H.mock = H_spectrum(plants.16$dilution,qs)
|
||||
D.mock = D_spectrum(plants.16$dilution,qs)
|
||||
```
|
||||
|
||||
```{r}
|
||||
@@ -640,9 +668,10 @@ abline(v=c(0,1,2),lty=2,col=4:6)
|
||||
```{r echo=TRUE}
|
||||
positive.H = apply(positive.count.relfreq,
|
||||
MARGIN = 1,
|
||||
FUN = H.spectrum,
|
||||
FUN = H_spectrum,
|
||||
q=qs)
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(bg=NA)
|
||||
boxplot(t(positive.H),
|
||||
@@ -654,7 +683,6 @@ points(H.mock,col="red",type="l")
|
||||
|
||||
## Biodiversity spectrum and metabarcoding (2) {.flexbox .vcenter .smaller}
|
||||
|
||||
|
||||
```{r}
|
||||
par(bg=NA)
|
||||
boxplot(t(positive.H)[,11:31],
|
||||
@@ -672,7 +700,7 @@ positive.H.means = rowMeans(positive.H)
|
||||
```{r echo=TRUE}
|
||||
positive.D = apply(positive.count.relfreq,
|
||||
MARGIN = 1,
|
||||
FUN = D.spectrum,
|
||||
FUN = D_spectrum,
|
||||
q=qs)
|
||||
```
|
||||
|
||||
@@ -709,7 +737,6 @@ obiclean -s merged_sample -H -C -r 0.1 \
|
||||
> positifs.uniq.annotated.clean.fasta
|
||||
```
|
||||
|
||||
|
||||
## Impact of data cleaning on $\alpha$-diversity (2)
|
||||
|
||||
```{r echo=TRUE}
|
||||
@@ -720,7 +747,7 @@ positive.clean.count.relfreq = decostand(positive.clean.count,
|
||||
|
||||
positive.clean.H = apply(positive.clean.count.relfreq,
|
||||
MARGIN = 1,
|
||||
FUN = H.spectrum,
|
||||
FUN = H_spectrum,
|
||||
q=qs)
|
||||
```
|
||||
|
||||
@@ -738,7 +765,7 @@ points(H.mock,col="red",type="l")
|
||||
```{r echo=TRUE}
|
||||
positive.clean.D = apply(positive.clean.count.relfreq,
|
||||
MARGIN = 1,
|
||||
FUN = D.spectrum,
|
||||
FUN = D_spectrum,
|
||||
q=qs)
|
||||
```
|
||||
|
||||
@@ -753,16 +780,11 @@ points(D.mock,col="red",type="l")
|
||||
positive.clean.D.means = rowMeans(positive.D)
|
||||
```
|
||||
|
||||
|
||||
# $\beta$-diversity
|
||||
|
||||
|
||||
## Dissimilarity indices or non-metric distances {.flexbox .vcenter}
|
||||
<center>
|
||||
A dissimilarity index $d(A,B)$ is a numerical measurement
|
||||
<br>
|
||||
of how far apart objects $A$ and $B$ are.
|
||||
</center>
|
||||
|
||||
<center>A dissimilarity index $d(A,B)$ is a numerical measurement <br> of how far apart objects $A$ and $B$ are.</center>
|
||||
|
||||
### Properties
|
||||
|
||||
@@ -794,17 +816,15 @@ $$
|
||||
|
||||
## Metrics or distances
|
||||
|
||||
<div style="float: left; width: 50%;">
|
||||
::: {style="float: left; width: 50%;"}
|
||||
```{r out.width = "400px"}
|
||||
knitr::include_graphics("figures/metric.svg")
|
||||
```
|
||||
</div>
|
||||
|
||||
<div style="float: right; width: 50%;">
|
||||
:::
|
||||
|
||||
::: {style="float: right; width: 50%;"}
|
||||
A metric is a dissimilarity index verifying the *subadditivity* also named *triangle inequality*
|
||||
|
||||
|
||||
$$
|
||||
\begin{align}
|
||||
d(A,B) \geqslant& 0 \\
|
||||
@@ -813,20 +833,18 @@ d(A,B) =& \;0 \iff A = B \\
|
||||
d(A,B) \leqslant& \;d(A,C) + d(C,B)
|
||||
\end{align}
|
||||
$$
|
||||
|
||||
</div>
|
||||
:::
|
||||
|
||||
## Some metrics
|
||||
|
||||
<div style="float: left; width: 50%;">
|
||||
|
||||
::: columns
|
||||
::: {.column width="40%"}
|
||||
```{r out.width = "400px"}
|
||||
knitr::include_graphics("figures/Distance.svg")
|
||||
```
|
||||
:::
|
||||
|
||||
</div>
|
||||
<div style="float: right; width: 50%;">
|
||||
|
||||
::: {.column width="60%"}
|
||||
### Computing
|
||||
|
||||
$$
|
||||
@@ -836,8 +854,8 @@ d_m =& |x_A - x_B| + |y_A - y_B| \\
|
||||
d_c =& \max(|x_A - x_B| , |y_A - y_B|) \\
|
||||
\end{align}
|
||||
$$
|
||||
|
||||
</div>
|
||||
:::
|
||||
:::
|
||||
|
||||
## Generalizable on a n-dimension space {.smaller}
|
||||
|
||||
@@ -852,7 +870,6 @@ $$
|
||||
|
||||
with $a_i$ and $b_i$ being respectively the value of the $i^{th}$ variable for $A$ and $B$.
|
||||
|
||||
|
||||
$$
|
||||
\begin{align}
|
||||
d_e =& \sqrt{\sum_{i=1}^{n}(a_i - b_i)^2 } \\
|
||||
@@ -875,14 +892,14 @@ $$
|
||||
|
||||
## Metrics and ultrametrics
|
||||
|
||||
<div style="float: left; width: 50%;">
|
||||
::: columns
|
||||
::: {.column width="40%"}
|
||||
```{r out.width = "400px"}
|
||||
knitr::include_graphics("figures/ultrametric.svg")
|
||||
```
|
||||
</div>
|
||||
|
||||
<div style="float: right; width: 50%;">
|
||||
:::
|
||||
|
||||
::: {.column width="60%"}
|
||||
### Metric
|
||||
|
||||
$$
|
||||
@@ -894,9 +911,8 @@ $$
|
||||
$$
|
||||
d(x,z)\leq \max(d(x,y),d(y,z))
|
||||
$$
|
||||
|
||||
|
||||
</div>
|
||||
:::
|
||||
:::
|
||||
|
||||
## Why it is nice to use metrics ? {.flexbox .vcenter}
|
||||
|
||||
@@ -905,7 +921,6 @@ $$
|
||||
- This means that rotations are not changing distances between objects
|
||||
- Multidimensional scaling (PCA, PCoA, CoA...) are rotations
|
||||
|
||||
|
||||
## The data set {.flexbox .vcenter}
|
||||
|
||||
**We analyzed two forest sites in French Guiana**
|
||||
@@ -926,7 +941,6 @@ data("guiana.motus")
|
||||
data("guiana.samples")
|
||||
```
|
||||
|
||||
|
||||
## Clean out bad PCR cycle 1 {.flexbox .vcenter .smaller}
|
||||
|
||||
```{r echo=TRUE,fig.height=2.5}
|
||||
@@ -934,6 +948,7 @@ s = tag_bad_pcr(guiana.samples$sample,guiana.count)
|
||||
guiana.count.clean = guiana.count[s$keep,]
|
||||
guiana.samples.clean = guiana.samples[s$keep,]
|
||||
```
|
||||
|
||||
```{r echo=TRUE}
|
||||
table(s$keep)
|
||||
```
|
||||
@@ -965,7 +980,7 @@ table(s$keep)
|
||||
## Averaging good PCR replicates (1) {.flexbox .vcenter}
|
||||
|
||||
```{r echo=TRUE}
|
||||
guiana.samples.clean = cbind(guiana.samples.clean,s)
|
||||
guiana.samples.clean = cbind(guiana.samples.clean,s[rownames(guiana.samples.clean),])
|
||||
|
||||
guiana.count.mean = aggregate(decostand(guiana.count.clean,method = "total"),
|
||||
by = list(guiana.samples.clean$sample),
|
||||
@@ -1023,18 +1038,20 @@ xy = xy[,1:2]
|
||||
xy.hellinger = decostand(xy,method = "hellinger")
|
||||
```
|
||||
|
||||
<div style="float: left; width: 50%;">
|
||||
|
||||
::: columns
|
||||
::: {.column width="40%"}
|
||||
```{r, fig.width=4,fig.height=4}
|
||||
par(bg=NA)
|
||||
plot(xy.hellinger,asp=1)
|
||||
```
|
||||
</div>
|
||||
<div style="float: right; width: 50%;">
|
||||
:::
|
||||
|
||||
::: {.column width="60%"}
|
||||
```{r out.width = "400px"}
|
||||
knitr::include_graphics("figures/euclidean_hellinger.svg")
|
||||
```
|
||||
</div>
|
||||
:::
|
||||
:::
|
||||
|
||||
## Bray-Curtis distance on relative frequencies
|
||||
|
||||
@@ -1051,15 +1068,15 @@ BC_{jk}=\frac{\sum _{i=1}^{p}(N_{ij} - min(N_{ij},N_{ik}) + (N_{ik} - min(N_{ij}
|
||||
$$
|
||||
|
||||
$$
|
||||
BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}}
|
||||
BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}}
|
||||
$$
|
||||
|
||||
$$
|
||||
BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{1+1}
|
||||
BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{1+1}
|
||||
$$
|
||||
|
||||
$$
|
||||
BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}]N_{ij} - N_{ik}|
|
||||
BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}|N_{ij} - N_{ik}|
|
||||
$$
|
||||
|
||||
## Principale coordinate analysis (1) {.flexbox .vcenter}
|
||||
@@ -1086,7 +1103,7 @@ plot(guiana.bc.pcoa$points[,1:2],
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Bray Curtis on Rel. Freqs")
|
||||
plot(guiana.euc.pcoa$points[,1:2],
|
||||
plot(guiana.euc.pcoa$points[,1],-guiana.euc.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
@@ -1100,7 +1117,7 @@ plot(guiana.jac.1.pcoa$points[,1:2],
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Jaccard on presence (0.1%)")
|
||||
plot(guiana.jac.10.pcoa$points[,1:2],
|
||||
plot(-guiana.jac.10.pcoa$points[,1],guiana.jac.10.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
@@ -1139,12 +1156,66 @@ plot(0,type='n',axes=FALSE,ann=FALSE)
|
||||
legend("topleft",legend = levels(samples.type),fill = 1:4,cex=1.2)
|
||||
```
|
||||
|
||||
````{=html}
|
||||
<!---
|
||||
## Computation of norms
|
||||
|
||||
```{r guiana_norm, echo=TRUE}
|
||||
guiana.n1.dist = norm(guiana.relfreq.final,l=1)
|
||||
guiana.n2.dist = norm(guiana.relfreq.final^(1/2),l=2)
|
||||
guiana.n3.dist = norm(guiana.relfreq.final^(1/3),l=3)
|
||||
guiana.n4.dist = norm(guiana.relfreq.final^(1/100),l=100)
|
||||
```
|
||||
|
||||
## pCoA on norms
|
||||
|
||||
```{r dependson="guiana_norm"}
|
||||
guiana.n1.pcoa = cmdscale(guiana.n1.dist,k=3,eig = TRUE)
|
||||
guiana.n2.pcoa = cmdscale(guiana.n2.dist,k=3,eig = TRUE)
|
||||
guiana.n3.pcoa = cmdscale(guiana.n3.dist,k=3,eig = TRUE)
|
||||
guiana.n4.pcoa = cmdscale(guiana.n4.dist,k=3,eig = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(mfrow=c(2,3),bg=NA)
|
||||
plot(guiana.n1.pcoa$points[,1],guiana.n1.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Norm 1 on Hellinger")
|
||||
plot(guiana.n2.pcoa$points[,1],-guiana.n2.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Norm 2 on Hellinger")
|
||||
plot(0,type='n',axes=FALSE,ann=FALSE)
|
||||
legend("topleft",legend = levels(samples.type),fill = 1:4,cex=1.2)
|
||||
plot(-guiana.n3.pcoa$points[,1],-guiana.n3.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Norm 3 on Hellinger")
|
||||
plot(-guiana.n4.pcoa$points[,1],-guiana.n4.pcoa$points[,2],
|
||||
col = samples.type,
|
||||
asp = 1,
|
||||
xlab="Axis 1",
|
||||
ylab="Axis 2",
|
||||
main = "Norm 4 on Hellinger")
|
||||
|
||||
```
|
||||
|
||||
--->
|
||||
````
|
||||
|
||||
## Comparing diversity of the environments
|
||||
|
||||
```{r}
|
||||
guiana.relfreq.final = apply(guiana.relfreq.final,
|
||||
MARGIN = 1,
|
||||
FUN = H.spectrum,
|
||||
FUN = H_spectrum,
|
||||
q=qs)
|
||||
```
|
||||
|
||||
@@ -1174,7 +1245,4 @@ boxplot(t(guiana.relfreq.final[,samples.type=="soil.Petit Plateau"]),log="y",
|
||||
names=qs,las=2,col=4,add=TRUE)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
## Bibliography
|
||||
17
Lecture_cache/revealjs/__packages
Normal file
17
Lecture_cache/revealjs/__packages
Normal file
@@ -0,0 +1,17 @@
|
||||
knitr
|
||||
tidyverse
|
||||
ggplot2
|
||||
tibble
|
||||
tidyr
|
||||
readr
|
||||
purrr
|
||||
dplyr
|
||||
stringr
|
||||
forcats
|
||||
lubridate
|
||||
kableExtra
|
||||
latex2exp
|
||||
MetabarSchool
|
||||
permute
|
||||
lattice
|
||||
vegan
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Lecture_files/figure-revealjs/unnamed-chunk-18-1.png
Normal file
BIN
Lecture_files/figure-revealjs/unnamed-chunk-18-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 40 KiB |
BIN
Lecture_files/figure-revealjs/unnamed-chunk-19-1.png
Normal file
BIN
Lecture_files/figure-revealjs/unnamed-chunk-19-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 59 KiB |
BIN
Lecture_files/figure-revealjs/unnamed-chunk-24-1.png
Normal file
BIN
Lecture_files/figure-revealjs/unnamed-chunk-24-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 54 KiB |
BIN
Lecture_files/figure-revealjs/unnamed-chunk-27-1.png
Normal file
BIN
Lecture_files/figure-revealjs/unnamed-chunk-27-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user