From 050956c01b713e5f3d2f6d686a57df861b73bacd Mon Sep 17 00:00:00 2001
From: Eric Coissac
Date: Sun, 3 Nov 2019 13:12:58 -0500
Subject: [PATCH] Some corrections on the lecture
---
NAMESPACE | 1 +
R/norme.R | 16 +++
figures/dist_hellinger.afdesign | Bin 20272 -> 20470 bytes
figures/diversity.afdesign | Bin 37400 -> 37532 bytes
figures/subsampling.svg | 22 ++--
index.Rmd | 139 ++++++++++++++++++++------
index.html | 172 ++++++++++++++++++++------------
7 files changed, 246 insertions(+), 104 deletions(-)
create mode 100644 R/norme.R
diff --git a/NAMESPACE b/NAMESPACE
index 835f9f3..1d0f808 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,5 +7,6 @@ export(H_spectrum)
export(exp_q)
export(log_q)
export(mode)
+export(norm)
export(tag_bad_pcr)
importFrom(Rdpack,reprompt)
diff --git a/R/norme.R b/R/norme.R
new file mode 100644
index 0000000..5d45a23
--- /dev/null
+++ b/R/norme.R
@@ -0,0 +1,16 @@
+#' @export
+norm <- function(data,l=2) {
+ no <- function(x,y) sum(abs(data[x,]-data[y,])^l)^(1/l)
+ n = nrow(data)
+ d = matrix(0,nrow = n,ncol = n)
+ for (i in 1:n)
+ for (j in i:n) {
+ d[i,j] <- no(i,j)
+ d[j,i] <- d[i,j]
+ }
+
+ rownames(d) = rownames(data)
+ colnames(d) = rownames(data)
+
+ as.dist(d)
+}
diff --git a/figures/dist_hellinger.afdesign b/figures/dist_hellinger.afdesign
index fb6838d202abc4251787f489ff496e8307cbb7c9..ac8d3512815547ec68382d9ff9eb0a4fbfdfd184 100644
GIT binary patch
delta 4436
zcmZ`-WmJ?4*B!bWX{02CfuTbjIwhr>85(Isnt3RJA*4}CK#7Y=r!**ybayw1Gz<*i
zz3+YBpWj|X}*5(!p*T`wRtCBUlD1(3Lmbj
zub5sI1{Ad9f-Gy1*6~kazqYP%z&yO0SQY|#8!3Z^U26l(c>Q}8y*sR{2Z$QkGP@)j
zi)4L-Ve`aN#Y%;BJ7Ni{_|B?8H$_Y=lah@=ME2#-?5LLL++o#9Hxs;jmefsw;vpQ`zR{=>5ZNr`2rGt{m%;H#a(9G!a9N6d@g
zvDuUyQ{&@TJnqUFj$2ntdJ_y#%AF_?K5x7&xQ4k-)$Ek}C7KYXLNQ<3twqOQ@|4kP
zN)2r8!9fP1w*_7+Q7bt$u15lYe&@-0J6sn_|8(PT4@RTr7JhE;G=QZhph88UdNM;Dt}FMJ|`RM{Bf=+F_CeSXmbQR+>
z^6scDmDy2&*_hFZ6EeRRhv5i7(epE
zY`9~YNp?2O-wm19Xi$eVTP?N$nu}-WihjB6Xq7+931<&?%17myED86v5_jc;W<2YH
zCg@~Ta}zh>rqwH|V+AjQ#X`_`%yeBD2s|m!DGRU}W+D*|B7Jc#?bEXLzxg4(UO$~lNY8&ZH4$E|{HA{EI((kG6jd~EmSTqlWNHs|3skLHl7BKY9G0X-wZMOmJqP;xrT
zfy%bl^Oim|+JS#HBjl{3#@rCUt@#l)A3Kk+)c+Cgi%fT8E^zz|@+5Msesh@OWl`s3
zztI}hacGlya?H|pJO5|qGF~K2lZkgqB9bNe`gA)8*p8R#y5II}=VQ2Qs7CH|d~s+&
zzTUqJ!F05e0nSrn)>X8soF?m?u4>kAulX4)BNdIGN9(YeT{69E1M2)n8iuXNlz4Wf
zf(!Q-{q%eGmfoZ`>YRSN-tf5UP%kgi)ze@*zj=QZvJ)NAfXqWQ-uE4={~%Qnr(geMWOu6`Ot%6ORW>EF?Z;uNuOB=QhVEM%%F2kA7totz
z?tKj*o;S@o{#Wx;D1GA<39FyI4;3bPH)lGucf~ht-;0WNA|!du1`0BUhxpjeF{z72
z26{(1vzfk|#TJc@*xbhukh599dCE{@E(NsgrdJDgIu1%?FINd(j{5|(PrXGuKFrbAWDx|Dw88jIcU2>B?ol(`?%u;U!(N19u?>P_?XPB@amY+bP&JB
ziL_MA-p^WqU+8^`VPtpK?S_&D+-uKt?^upBNTGYSX4+ZwieR
zkW?wjA)hRS3FY5BAN8sTk5^+%s`vrCvngfn9_Li1S?Dj+pvdjnT|F6c>lLV+bw@?J
zL!*Dp+>OOmR&CWh^D4(9(Xb_;=cH2%?-dTGtcw@@R@gsKY#*N-;-ELhQB|Cr#_+RS
zWD!o{1YHT|YAH8^uhVHh391}V6*tJNmaW#WQMnZ?-q5xdBwHsX#gbuSH-iG$lT_0Z
zm^J$}j0Uk~=8||}dqx~`)Nc+7r}&0(G%Byi>IxmW-KZO050n>_6#neWR>&&I{L=RP
z3(2O>l$~CWNz<2O2+cLYvZ`iPWyt1-1el64?q|#ad&Wn)P(j|9c}8n@IY<3OEfN?UYx%jBi3hG0i&2R|>fDeDFhFc|(dk%^4qSdL
zw0tCWw=Wi?j4pZbLN0o9{9qJmh;`4iuZHI=DZ>tcTP5~d+lKeBhUu6AY`s1OzggK{
zm5m@9tTlT3&1hwK~Gtz`jw}GlM3)mGM
zZQ8G>G;u5xws5al%?PlK+096?g{~KZ2Kf(GLUwYEq{;8%z5!S2K0An7Qvp7#-XY<>
zL2<0Fy53H>vtz&81=>*wj_y<nCtGYBVvzv#91`Uy+#$^^V@XW#MV^P
z$EY2vWp3z6!ZV6&o15d~(T$+QMd&d3^+GT`OLMp1Bx>@)R<`-(Eo09Px+?EjS;JXO
zKQ95{|Hnwf3fOGvdB`pEL;h~LLK~BjFp=NT35#)wwpL`+(a>%-mXAVMGv%EU5@2m4
z%v~~KGNQ2J*D4BqKHCes_I>t!xo%Dxfon*AkJ{@)dmZg0tT4f~El)x#0W*qgTChd}
zxgS@$J!E{b6lSFoy$Ge8upg@R{v0bnwK}mS(B-%WV7!7y!j)vgn8EeyW2adxxU}mO
zC+9?_Ig+)-?|-l$x#@ch;{&D+wCX?UV7||YJLr!rW@wFI@W1Q{b(q&MPY5CA>n?gz
zQ#5L2GM=`Tl(lu@z{(O~%3ZC#uE}Lk*{W-?pp1|k49hie2jg_ZQB;Cic9{r~x>`rV(*TL{iZ
zdovH23AerBOlc`&bTDR#^L_D+yEB$+UJ_Uom7hriS;iTV$gz6CXj!MRG3^7K<>*W+
zSrHqai9|1~hmEj0v8B6i*}7EP%gI@5@ebdmgZrq7rLXDdc=XBrRVr7U7GVr7m>8Zk
zW+PARtzMQU-lX3iYPTg-ryvGS-#>l8NAPxpaXaGv!IlaXek%KsxEB|6;@`-TX$N3q
zO0DlobPe?@g-QkBuz8^Jq;aP7%CSCvHz33$QT0@i#$kv9XUKBzlBbpob?wZdHcOO(ThSH`;^a0-f8M3>1rj9)a|^euZT#^FFd^H
zVv6?CH8X8_7ztpY?gmrXxmp24tP7xS%|GbmdB(sLC<-?36l1N$NAvUuK7vI5z|xr=
zG~?C&QXr!=h#q0FW;xA3)T@)j=#bUUrekzOefx!;#&}tHoi9p0i-O42ePBzxf5DyN
z#FLbN;6||fLBOe^&QH_d(rBI7IkqM7_edV1D0)bvWqH)o46j^3@?_zKwSWe7;*M*I
z!;5C?Po3%sPWWTadLm55#<^Lh@OB`d(ep*I?7*R5i_P01x?o(rn^!8ocp8qO9<9>M
zW0|4XF)JmjD$dZ86|71}b2GAW3IeN}j05g};{YJEVvO2_N^dD+~Hu`$Q@G
z5r_X_5*WnKuk~|$Q^!blz6t0tqNmsMrHYeXxwYBg*B&FumItKPmxhk2j}#Wi)}P0y
zGHUUPon2^sglHEzv{_?QcHdNgw+RgYb#Z1$Ri}8=+j@D{yKD&VxK*Zjcq6olZk-`*
zvAVddImLs-jj|~LVd)aF!Rcb|(V_S0V--P1J4?$VR&9I}`@yv_vVigCL*`8XE+nX;
z1wAM6-ts~kf_qyL|M(I;)6*-^vVHM_Fkf^F;)C!&zgY|T6nvyF+kTdO)k?d3zcBkn
zo4xJ(o`#&%eu(qK>WAQG6f6Ae8nG9@l+m!NkkasC0jO1hMHeOy|@w5YceRb
zQ(&_1RiUiDHJ@nwI80O6H(tiqKU1rdvr;7RTOGboqfscQICW>)YhF;knjldR8GO4p
zQ&-I4A!&CqmG?E7N$bjh64G3V#9{x&lbASHk+8&8wSWob0>}$0v0N@8sj-7XaEOhN
zQ*$9I*iY$Fd77S1O!iTBBaNp7iJqR{Do}PWY(LSlf<={AOA>bKOx0;iu$#5D{Bgh8
zF1IZ6TSQ>^hwlzj#TpwK2AwnUx?^-h%WB(S`!xHh&bYYz2KKL3?Q1RnaN>8ZyT
zifg<;iO5ZQr$+jD-SbM9mIHn@)!%^`i1W^{%3PxA6%zax)SsgH3PieCO}_PR4G>M@
zIAgw`0amjy{Nb|@#$Dkb7ARtT@uY73UtM$HHy!MOr}0GN{T+4HSzpUzUF?x@a>6(=
zrVu-Wh;B%@%{FFat5#3YXL6qHa(}+B$^`B5NDbI62L{sWtw;*H7Y(tfy0_g#Ci$=O
zvu%;2h;guSo_La|86z6iuSimE_o=R6XS^>KP~*xc;gX2Y$`YQZh)X2xHs)q%ImkUY
zbvZ31Zm~45yh(Gno7psHoK$MkF~<^2F=w!d?`C-AHcOps8CF~RUBIoaHbD7DV9DT?
znIqTjj8YM-N(xp+5|NQ^Ix$}Dwot$A#+N;@Ef-fAc(I@H=p#*{qm(CjjD|{T57HVPIKxbZ^2W;(wJe5XcRtgv9=l?-Z(DIPoEKzIQX@+;@n$w5
z7~TZC0vN4f+B8iPI3gV29*q}Cph>H0}o{9VJlz<_xb0Nfs>m}O(gj@ng0Vv
Ca%zzP
delta 4195
zcmZ8lg;UfE+ua4cA`QaQQtE;tf(R@POGyZb#7c=EA+@wi{^*7UrMsn;5D8(WLFrsV
zx&&eAkly#+dG9;l%y(v<=Q(HQ`~l}VC)wbtEU*%Ype}z41Ohp_x@hyLI$A+!{uY13
z=il2+{a^jREwXHA{0}GkceGuQZp86QP{8WFa?^ZHVz=bOp1&&aBRkK2j^XiE9&gnp
zK2GV~bT-zdWppx=X&3hsD)JZU*(XP6>o(kNT{{pL_-cHQtzyp^#sncW;rh?%B*7eQyf$u{!!CS%`Tg>%G);HdFYEvBKD`%3C9i
z$~EaNzL`>xGvU^V@0}|`KhH?pMV7CMasL3}Y=zoH)~|hVX4ghqV~VAQixC9edA1W6
z>qllK!ItM*^@^B=*{c2aoeZ<|SE++tMP|H#*j0Mr^x5EW_L=(1`RdGa`B=KZpklW>
zi+ohe9!F*jl1Y+pRO-#3aIFqmsr|Iu6)I9$d)e2z*_XdFkT($(I8H)0kq
z`)94vL|D`h*P&JThx(h}9(g{X*u1~HK)((&Tlij3F2q@lmAIfv@is6i7IRL*7^6(U8A>5vf^V*VlnTMK=)4KZ1NsMY!w&&^2;pF_?xq81Hj%NE=X2YlhPcr^OEPZUG7P#*IJJ(wC`vT7xV&xt9QZs8#R&p_*iG
z%djKJ>3$x9Cr4m}-9$VH`{mru*TQ=NA|A_QYMs*p9l`0L7QvNwSHti$)0X0myB^F}v{z!{Mm0|h{vv=Gig@m@zv0J)iF42z*)uzG!bglXzv~P@_TTnPl8eg4
z*F=}Gxww0!Q?I^R&;_Ogy#Eet?YBs!S(*J+V43+A
z$0`+^&geIk=FEzRB;dOe+OI^GILxhew4X5jrq3Xc?$YN(034G;)J
zQQ>WWhQZ2?c3(Cee7+vhvS6UzpB;BI$5JCl9darKA3t6d()*~7Ne^7MV)zPxD9P4UObtcl{GC#6Oh%?f5JEhE-VkPYSh(+ZhGhzBq;D@W*NXY
zZv5!Y5zdfHws%?Gw24)gMQ`(n$^|=xw3Rc%D??1fZG>0zh`-Vb!vpTaRmawRH5t#d
zd+Y(fp3cmA0F4Zx5nY6}k>Ud7l&MxRxv;-Fo%IW|mJ%2&9f5AAyd!ehlMf2d27IM=
z5icMETy;hGP4?ag{e^ESUz8MG!O9A&8ji{->SZatl_t?4guPA%mWfd(Y#gC)UK7H7
z4hj)~DSyhK5lwIs$~og++%IpZP-Ts)06JpKOIW)`m6YfvdkUheB|=8pey7gFDwsLX
zouH+>s6JJm$FVVpOm9`Uh19^Y5;dSV`Y`ws1)p#i>&y3OJ@8
z)|{Fm?YBYTwfQRSmsm*d8^wTLBw#-3SHL*z+oWX^KBRqgD???dWU_dv8FDvrnIl3m
z>FO)Wgt(HK=sqTOgW9VR3fZ>xa6X7fEL#n2e{r#yu3Uq0K2hFH{N2&TPA@v
z7y}jIe)b=h>_vg6!EYVzzT-_$AroFMQW{x=CzjP5gTFExkshxAg4tH`Eek(ls)EzL
zud7#Ij9NGfArza*qYda^={+#5x{fBH1(Q4YCO)#(1xs3Mw3`#GmPL+rfCCTskt2R9
z2w!P*dG$^>JZGUNSdwNB1``Bf9SWvLL6C{1fwACik0~m1SLOQD0ecEd{2q?O&++i_
zFMQI6xd4SqlO~mA;Al6w_>ow(m&J}~4~Lt*XiR+*sr@xNM={ug?SzhPf_a2lfy|SM
z>ihbXy^mI&tOSPtU_1gCt@K&D1kQW9R$vN+8sYWVTkp4a(q@c
zq?&`+XI83Dl{Rz`VqH-!Y_sI}c9nN|s3ik|3y9*9B`~gp{+tfCd+GX(C%>%neSz!q
zy(es21)^=bt&ema_dxm&0~TG%lZfRhu1{vYwtoJZ_pQfKk`MsiEnm91uN=svzV=q3
z8j4GofmX6rwlr|jx$h+PuGiu8OyO(|p>D5n{aU&GysMqPeE!RJ5?cRC-I-VSD)Q&b
z#@kR^!sSEn#G5Uq&^HGBK-0s6&a93<(KTe34%ld|Sr#d78Q+d6G}3dwdw;s}}ZQ2F)a6wlW_hbSTWB5Aon0puux$WrqMl#I+XH_@6
z!nGmBG?Z6;+g_og`c@WVf&Q_KSTz({{F&_@mSvzdT@m^hA#i0wXnDx;0FOzPy?sZ2
z@g&eJ+m$$@z;`xJOd=4(a&pd`M~i?(I^~1b;_xaNl5`@)CcMYk
zfmcQa0Dl1Gux1pNvVfUOy%V~g@%NXABK!1l`wIB>(`o4?BiBlSf}4n_6`bcZDV-SF
zNtG{!@^VR~$*Qqt2cf2}5d!Q%Ou?x0a^6EUjoG!TJd=mXC-%S{o5>abnqqnqmY5|k
z57MW!9cE@Mg`7@>n#SB7%Foe(+6J~5b-^NlQ(ivhW&5D2AQ(J8OK0oIEK(VqW}xh;
z#;e8bZg|VxkX8xOy^obkECtB({rMkN7Nlgg`DiFFWFuW$ZQ|fat6)Ccwx=D=SRQd~
z^sAf!5za55Yu;whV&h@9QsFnd%zVZ@dXMPGY8jLWbF4w;zt06WAw>-uS)-Y#x7g6QS>FCc%w5wYiY8K6
zfgE|0tAzNqpsvj6u9CK!_kMIeta({`p)St7*-`>7y3J#2nkX3trsC8
z_kXN(Tg%Z
zeTKiMh|jd6GC9#hs_2EBu5`J_`3t2@*iNu!U`>tH2E2s$^BYX-^rOqC4O;+2t+tq&
z58N$Vmd2oJYVlLo`QyluWu&5*73w+OVy&K=T!5?5Cq7Xu$}LgaF$i&p8)<>etxGR6
zSI%bI9n0vK%f2okRBu?G8g!O9qINL%3^S7c`CE~TNJyKvaEb2;qWnh&;
zr>?o;oWj(r`pUvGR3WuQ;(G?wtu^LelWnH^1=&zE7%o+d&(jv1#;{uulk|6|bi
zRdc~mm316QF3F52jH9z4M68f?%b5O4`n$}pH_N40SC>00ry;_}K2O1+gZYXz5j@(l
zSGlZAdFb=E@LNIOx|tuox!*Jto)dhVOliR}5~7#;I%~F_zo3eb>BR=al#9#vxs6P{
zy65|G4<;qLw!XM{0xNR|F`1G^yVw2HeO?DE-HmdRJWZ#XBv2I)oGVfrk(_CBF~#40o6JUSLDzt|4|JMH$J=%)N303{_#%@a
zrG3f&K~UvG9C4j({?zRovdY|WYa_&x;0c8PYg5#d$BFWs8}r3vLc&STMuCy-L@#{a
zP}{qgSBzaQHMARxHA{ytoLIGg*6@54%8U}XFB?a;^-0c>Wyj-a)_9U8l!6o3!7amV
zk?%VUIuzm^dx04*ifix!CHOs+#IgpxG@fO*@#g-9$f2f
z6ysD|nn|xoib|Hj)y1xq9oE4VeLtL&T~Jo<2}>DB*-bqN9S;k_P_mkrY7PDj-5vd<
zo}MfMlpmk_G>dqYBK3@KF*Ye+YTvXvyObFk);Z4o#JYRBPV69Q^(&00Kl2aMk@DY&
zEIgwo`F|oG}$>
zB)zmYeplFhpU)%Hd@>p)$g})%)IHKfc)2y&I$TS2651_*>^c)NGg(iQr8dIf;8B*>
o{r}(o|Fr=h&uQ4WLEwK3&;E5uiv5@TuPWeuNPbWQqivS?4-_jOSO5S3
diff --git a/figures/diversity.afdesign b/figures/diversity.afdesign
index 807a90415e505b4fb65a4231d594e297b9ef34fc..5491315d4d496a660303ae7fc3569de7f173a1da 100644
GIT binary patch
delta 4939
zcmX|FWmJ@n*2SSiKw=m=rKGz%g`tO*M!Gv68eXZPyPH7?0i^{&8l+o58l-DLz|VKx
zd%wNT*|E;g^KWODgYqjt0LmXVS!@&(6jv`#Ek;$>w{t{)!(WIK|3{<$oBxwgP{5BC
zrvG}hzYF@iTAps+7=M*f$-&wG*|GR@e_dGSX*a0l88y06KRFMHV(M}xS?F7&lUZmk
z5p-FZ$XKS}KDV>Ak(+ySrviX>;IWTPHVh>*PP}`^utC5h#>s!gIzb5W^)9`aF`8eKXQt)xOEkD_5P-S>7CM^6cSmyDbblY1>;1nh5wyTwWK<+?J)aOoTC)
z$gjSA7Paab?38_6b!y@6FKwK=p?V=$pY`3Da1B*i2FQVS(Yj8jVk&nMiIa;ZcShSMKI{cPZL0K{Czz29V6B1tE_440P6
z!K;2@Q9wVGC(HK9cnD=PN%1WsZwsjHEBOOEl3j=D`~qabMChD3Yp>aGCiSB}MtrF;fpLZEr%pGmJt$K}Dtlf}J58gU+kU_{%`?8{KcpO>^R%z;A?8
zR-y2iU3)lW8>~tnqhgvCp70Yb^ZB#was&hqwaJF?X;T3JNQqZ7XidHNAVn>F)B6Jy
zDz<_kZHYu!2@k2IMua{e+WAN$ny4I|bq=;Z>-gf$=$;!gm`(a7+J|zqm2)q-$k6od
zlNG1=#h_n^aI*R2?T#u-%W`-h3%r1(rkW!ejKq0Wy*W
zHR=AObPK5b%y)RNeL1g{#!K%E1zWtiW1?L1(!dw1W1`LL((1=frH@V6(GzuM%(6%u
zFb*&6U*2ixxhW&a8#|!QC5zcK7OamiKI4C?UW?AoSBr?imu-F_BOI4&Py4Nt?=tBg
znoKSHk;Bs4*jj$vH<$U`RG`v0++f(zj~And>?{w+D=vg6J9g_hsx5u-d7EnV!H%3W3h-2ri4W==Ed3PT6v!BLPWjcS`vpiv+
z?lbV~U3H~M4J)Gn%lq2+=;L(h<&ABx<;jLXygf`EG0piiK$)*
zW=%RBwt5Jh2pl_k$7o<>H(m;(ucv;;3{2e?ENse$sp?Ysma77g?;-8B)&A(wLJZNUq1_s@N
z31@y6Gzsn5v4bLP0JRlouuS|ww7bCtL%tLzPlR|XPV>qZ
zM6u^py-RE*a<&|rL%RKgQA5u6Iz`_R@ACRSjM(I2=j}ffX)wBV4M-*b!50}VlBjGF
z0ZgMqjVX0Ug+tK5)5`=6@XW~!FYwV-HbfF>K5XNRH3H_KXn~gz;oeaY#YD(vM5GI{
zdU<^p3Bg@~#X9!x!mD*!axI?MB<1txkvsq9qn|i5HnAqw$7Fn%g#mj;~d%KN(mN;SG553((o+a
zFSjzDj^7xh_h+|7co*oX3bM<$rW?=J!AWVE7bXiFh&t}|u_FHttNokzM@X`{0zb~~
zw^TTcw!b
zkEY=shR5FM^B>yq3J*0dyAZKcR96Z24AF7XNjM?b4fZvfN8aUT&RYj4#ju=#5}N)N
zEM)?QElJZ@x+|LY0DPq9IfkIBTkk@jLq3K=r3R{60mgEPfm0E1AoE_*w`$%NbP+ip
zZJZwT6gYDfJ(G2IarVbZ%H|pzy8e#~bkft$I%J6Q)a78U)HJ#Ld#OzwwvOGs1GLco
zel$h-8^46_fcIo_mUEnR?Mr*~C!vaETE~0O3EM_HZY(Fh7f2eF`TnqnD+_1>mNdcq
z_9jdBtCBDg%{__tRkK8KSI8pm6sG|Gl$2k7_IUxz$7!mHAJE4dB!
z(0xDE5e1SNvo$DMJBuiDDT!=X9@f#Fz>73^kr}lA$2+N9EPPcT!51oBJmM7xg~YWpP{er
z0u|8yFmI7cP`nYd6@|Vyv_A_lQHRPepR0&ct*Q6>+1H;i|LnjzJycXz);eVd3AIlY
zk@2G59n2|EwYMZQTQR2|Ix@fm0jR~ywRhuVOQ`|ra=MgmUGlhu@y$YPtDYME5afG^_5D^Igt#5A8457B7?3kK4EM5^qO
zPlqfPS$P2LZ3IyrP*HKYVRlzspOwenNA~(j(H=P+CRlpBq>j8g7}jnQNoi{20^EKZ
zmsupR>IkJjJF_QWGrMvqy}-W>3_X3l@Ra1n^%J<2a-hDQq7^#he)L|=L=ZRTb4Qs7
zkxF@0|LCo{q1qT_Hu#hh2>AT&2Dzwj-u5VHm)H1iPC!qS+qhF|`6zXck7B5ib8w$t
z+>L9^FC|CXExqsdYFnvgyJ~K@gJ3S;b~`s|;%b&pMWZQa3%V^HP9ka{@dfU5;Hx6D
zkAUi_c$Z8Ie*ypa#L~EJn&Z^%@j8{ypnRb&B51oEk(B%CT6^skTyMcZR7YCZ$Bohh
znbXv+Oss;VYrOWSRaSU6E%Eje6$nZhd)68hn6{_4C1W
zR?#SYYj@23h@rVh-nVxFefp!ahFxeJ3_!Eaoetq`9s}ait<(5RE#oT$P!*1!1nYs1
zlTViizwcmfW>Q-}MoeR!aSEFg_9f8zK2}lTCeBXI%9;E6{~2pz0n%l3vZulKwVT%8
zB%YSv;r7aH8NJb5p#@1L4`@UxVPPba2;5O_emR-oV=QE_cm+@jOLmNtyNcY?|JDMM%N!e!DIL-s@e6JDy833xBWXQ)R(d-;VFD
zS24xR>auxnDh%%y`$t}Khj`!hQcGNRxSTpWh`Rm=8f_I^r@8+YADawy`d#&!Od4*b
zzJFqW@nFlPuD<&ay`g^XeYH+b2V7j;9Mcv_1UM9-D(Fx)`g^a=^z~8ny&?zSAk4p4
z+L3klIByl*q<8b&?^nduYY3qK@PV#26#(wu8wyLs1mlc*t`qSsi7E*_a)usT-HE8)
zvPP!ge{s4es2n-$A}}Y$$Xs2y0s^-Yy99-~Pj$c=>yH+-5bmp$-^6{hK*}wZsIUEG
zL?b2*5J%oCU1?59_3*wG*LK%QgZJ_>s4(Dx>6cYM-8{l-R)fxQxv|IC6kJKwH
z_@OhMo-4hy&Eu3Q5P{Q@mD^dDT|e`?!?Co?g>iZFj?g1>H>!c7&~Bt~2lEFiEf!_+
zt$8RJINwSUeACH*a@@`U>}|Gq?&w@D7nFtCJ>4q5HAr5R7TXkAAn;@$H;8+yeAm{JpI-|12ceK0GdRFv!
zN~U%r@N46_ltL~ce7oIrc2$f8Y0KS4-rJ1!J6S*hQuh2%0nm*ED3aL1Z4%Bn<0`Of
zAeKXoW=AEWy@s2v9(_zK8jBFme)NQ4{?>aNODvK_p?VIUFc#cPx-bZz`wib_mreTa
zt+&*vCJy8+8QSj_wVq83l_oEs`K}
z1okjUe|ng(PRRlwdPPv&W$B|J_{^Lv;f;L_20v`$Qg1U9;$9g5+94EwS5GzU#JDbB
zjHZavNC-#K4p^*|^9K!BeQwkDXGQt!JnZ^|6-{Y@ArpN4-H>BwO=W7!V1E^X5+;^N
zdaQizb&3o>K57UF+b6Gpiws|$x)>%o|WY~
zemSKo8|CY&MIkn%jE*J~IaAzQP)zUe9j>7jN|C;f-Olv9oeAAaDZsr;rl1KZ%STcF
zn594VP+1ME_&%3;zC)p~&wuPwqF-Bm-1O^AD`YSd8=TCt#DHXEc(RQspEC2A+n$t;
z&sz^T5)zS?b9s9m_GNmJzSy26w3;t>5L33C(9FSa0-+$e*X+J14_X9Q_qVcrxBBW>
zGfSxWi97lY?pihj8`E_5Aj442MXR!IZ`PF~wjw6L>AQHW(D%9v?Pc+or7kY<8cda4
z{_ugxD6_B4om_|l0q+9^ipFJmL*CI{+gQ@7h)Gq#@8olbCA2<-iAmN?Mr4ltaeiBo
zWupm}pVYI4Hd>84KQrq*e^`)NnkCAlE}u(E`Gr3B`jv#xYX1vFBMQ4+8R2XfNtCY5
zXPB2bAT&*skX64xLOF3$Tg{=rwd@OR6Sr^k$!3OOw?nAOwCV^3vok&>1>61efe+CK
zkz|vCq|Jh_cF&v+M!1_&zq%2JVO^*NC=Qo(CXHGNkfT0yissPr%sj>o$Ld@Q%WU&>
z*=i_7Jq?>`&UCz`5W{Qy$~}{guQn#JWfjK+gmxU$c=4?}(%jr<4mq!R)!aXktQm}p
z;Q(vFm&+kV>YY2%W03F%-<21am|lRC?^0~e;x-d49{>BpnqEXfq`&1Iw^@+)0@-e(
z$@&$v#tNyqyN&}-+l|x|b|s48d)TYFN9qNs;;g3}7F8}331U_M%>O0mzhk@LU>gG{
z<_sGT#$kz*KBjk=Qg}vM9lS=3`fRtPSW?`Sr)Dh`C3l;Gu^dO^z#)Pw(5X{5n*Xys
z|EH10aI69GB+{s}B>PBx?<*MlUPE?EgTI9hiKVBkdCiee@PjVwr<3Q_^g*ZA7q(sm
zZ45J>>H@j~w&pAQ>0#NFKv45qt)c{wFoj^Bou$l|;_<%owOjNfny%p{D*R@)LTyoR
z3*qw8Woq;KxsPVgc`^;UbmZ?r(bY|VlY!R&cGB<>**MD9dKeHD%05lxFDX`qLR*`?al)?m>*|PLY?3Ah
zDPa5KrBdvmMgeQ&XvC*%dHg&e5#lTa>9z2x2Jy1HC*#u&5H?}!TUBdyUP8Ev^}}MB
z2PNj*rN#8Mp?J0XF+iBXR|8ixF~{$*#r1hxd*29?Lb>{WZ%+9Nh@_cd4sCzJLp_m`43b>1|YAzv==IU)rJg?4B@oeKy;~K_r
z#*P)uwF`!hjY5DeSZ;!Ekg*|}-r-)XhQ4}?OQJnUvqTD#{l<`gPI8#KQ8lqw)TMUY
z1#xw^lKU`&8okT6RtF#x{8>S_w2C1TOs}TM&zGXD=uBD0!}uc9aIZ-eopRrZtaI+W
zSi+_?IdhEpY8uq57LM#{!tcpxIvD4u9Y5ky3Ucz5H8)pORFvC#sv5em?o?_PnTZYS
zIr5RBDtR=+ruzLGiKvU1QY)>qN<#%9QxTSm$=(C)?&^7O3<1Mjs#*a-aKoGBVPO{#
zXisRftnkv?10D)JA2?_idGnYX9!hVX{vm#5uAY3K>_YKC%xR`#)L$wUbfSTa6x8&W
z#K&z{RdfZi1Kkzpy=7I%YGdl-s_9?E
z{ulr2VquZ67@GWtgZ^zTFLxiDe>l6cql+K*z9eZn)l|b9yHH;oBRn1}l1QZ_n*~#@
zoBA79t^pUQ@EmDI4N0k~ee&GGw*d}yJHYY{vy##!H~D3)6)#V0h^$bp6<};r7rWNjCRngMG3KiP5U%k1KhMHTh|{Y&VPH4WjCMy7P3E(PRnkU
zw^3D3m42|&dKdkjBE0;e{uG_3r6HMO`EnP{JjYa2c!l&|j&~8lbWVwg)r0}i#|JLx~eSupilag{VxUM?ihj70Bw8^^vt^SBen=}ITgOI(9=|Xk_pXvYwbCQxLvvZ8iJ01wn&R=>SgBLv#Gr>)1C(*
zvO3hRl(o2Fh%iM4xV5i1R3$q(?-TGSIw&=|lbYMZazLEdVxsZ(YHm-)bTA>K35`LztnpCV?WOBdy!Fg!oBz*a%LdB>cbYGBDI0;#zT}*vnMw2_wVy_7Ji*SCq5fS@rnUn%pz$k3m
zjY+MZ-iKDH=-neIHE%fE9g2L>U9?F@HH??i30YIalfTWP;dvoUzQmRTXrMHMJ1t1ZnEzquH41
z1L7X1K;6t?#)z-LTK9xk-t^O%sxYp|#6k&}r7jC1F10wC&7bZ{mwvK
zA4r_1HA3u~MoY8QMe&M*hTaBy{xiWJvV&aLH;Cb;mZ925wgelc3q>llyeBS|6>ydb
zM^^1I_g3bW1&v>sXLi>|B@E0(gQlEXaZq?V7{NWQCaw$2eQ;1LC0*meNKd+OkHIUWe?B-ay8D3S>?(xHnOK3uc2`|L^sVHI=w~Dj
zfxKa&mCg@F8~B?+JDmbAGU{JxU{lqXD)fEBD1TB?iYkuIS-J;?6e3QRfz)gfkBRYn
zkc0B&kC%S~d_EcmR&7oO&k&5C^j=i<8g`ezy)5piLg=30+=WpSFwZ&u9{g3F@Jm1V
zE7{Q1FMXGdcp7+7)A&?jqVZzwQMh;Bta~1!5}waUnGhvsR%TKoDkjRAh$7SkBKU@r8;z$VDa)
zVzy)kSO_fPw_WRM2@#)lBDn;jAI1ATu=*IP)^`JB$SRu+x|`O0DbMo~0#@OD%lpPN
z$72SVeEz{cUW&B6lfzy^=i-+NpkJ=58MJoFHcw=YH0yJc`QyQDD!ui
zcrggeErVY&0rU0ZS|v08%F649KNr_K*^e6zM~&t?T4TU^!zWyvh(8#vZP;ty^kIPh
zfH7wuj=*qXS~xb?stPJmsA!$SEfnElDFrgpsOYv2W~gYQr#v%CNK88_
zy6@BkSJrk>Xv2Be?_osZYw#K1c2a%6GkV&kPbsadfX|fb!w;w@q2gVUdbcQ6
zf?y#B(<3RrTcM+onf=K~IRU^a%=*yZ=^oWrHUd5Uvtf@vIIv-ljR#1Vi1I7h!5?g&J)?Y+HF;Ce%;PX=5Oo63)M)=5nJHR13Wih<;X4+%$RsiwpON1
zFrLSq6idF8o!D8F`E#U8eMbI7>mf7%b#EQ!HmO2?xN&`d!nTIh&=QzD1`RmKVwlMZ
z3`)M=CDAzF)Al$lk@Ud9RD3QLxZ%17gcmaCE%e%pyDvmY`#o~YE27#1R{koiQIcGA
ziA2HPr@)5yVA%V*KAEoTUT@oAtH`&VM9!qmtR1)
zr;O>4e1SYGPCbN0!K=cqC$x5~?PvvbC$jaG+3dJ>5LohFxGT6mFdI?nmkiA#v2I%`
zH-y-Y9oj9;9JgA}Z_^s_s65f<_g@K9BR^Ok$Zc$EupRe2UCe5%y!^X7lOr4C^71;N
z!XyY7gydBe4BXO^`>5O9M`XmK-5T#CsT6a5x8HUrKM^?aeW&WgN~3~o38hZ@fO9=X
z?r=JB3M`p69t#8x=F!NNN1kaxwYGp
z+z;FbH`-{fku5GO6nkPz=;e-Onr}+oCH4=iJV#sHCdKJD=|2}^gB|M`tgcst@qXHC
zEpujU`;b_b3u$H$r-!T?})x0^Ubj3$F@B?pHFYcL_CKLYan_Y<{5=qp-~T
zg`()JJVLiMJR9{X6|nH$|F~HkCAe_}v$n9b2r%J1)5h+an^flYrn1PmeQ)#n$);9u
zFeBufY!z=O^~ZF&Z~o$umDSTyFkBKxL0sp=^mQIACruMZ$Vk{?9AS=<$PW=E8Ngkn
zN;lLlQ%s9!Aphb+k`leQ5T&hLXb%)0w5|&Zy=3~BCpVCdFA>Y<^q6+fSVyxIdZD&K
z;nqm~&tLgv(pYE#;quRTk%C0G3QLnzxuxu#@%n`8vB+f179?^hI
zZN}s6o)%RfwP&S&*EBL3NcCo|98PqBqxScVfcDHkFv
z2!A{adz`{Wy30Xb42$?&827UOUxA3f)9%fM`D1iK>R!?wd|q*LojPZe5h*)@LpStkED0dM-!vJ!tSe
zS+p%~Lck+?VXZp--yUXrmwNh-wVOCx^#2
zL?%0!4}Ch9=@<`u+o)?`4lgvR_6G|NKBc(450N^K)^l?*zF-MVdNU{L3uq#JBHFRb
zswtNqZ!V*^EXaq8@Q-3cK-o+LuJ28`Mcs!&Rc+6;TXx>};5X4^Nu90@(c)ZQ&u(Tw
zZblD@<}yUFlYhlIrHJ~uh!3$%v4C_(mAS>wbslOEbvCCM*M77DJUXCl#j`a{256`b
zDH)7^-92F=EiX%tbDQl2KN7+0!pdvUa5+k8`bs~Pu8mtd8swjhBb*A*{1mlWN
z8Fr;(>Y4U__XU^QxQ)?K-RAmPNCv;Qx>O2J2R>KSQKUo$#}%zNPDIm)a2DOq81ur!
zQN3ZQH7iyA;JRCY-}?kLCAB{FY&-*Hud{K>-7Jvhn{rov+1qd}^B9-_(jnZJBY)X9
z&+YELFkjDwb_QY6gc&-wJ>REBv0LpgtIhoVX6!e|jm^*DbS)%^<
zRCg14{x0S1d}w#g2F;L!)j%q?
z^6$3&d~K~?BWV!YuLXhJIByl3o^QS-dal^aS)%D_qH)YuGN~|GC=^Ts`MvsL;;u$1
ztX;SN+k*OAVj0lqf|3B5Q37`gi(`=%&ut$zmRU}}$2~vZ8}6b)IM$T$@JydR9ajyR
zHf|_Qqf>eXU?IB9Xvz2vnm^0c3LY)PD@p5CWKU+_f~ZC-AH@d9
z$E#SR{C*sC46YhorAQ~7#sr_@OvyTqa-`=Wg`(=KUz!V1j@doNu><8YMwg3tgr^<}(#<}V!XeAu8j`8}IKT8Vd%!UjuaqGN(BC;IBv#Wb7vwov_DnfleoAcb
zIU8h?I^x`SD#k5tXZC8ZpKEx{F-L4?>ay#@q$3on@B;cmFClg)LJ?6qzpSq5TV!f+
zoSd%fBH~SV1%W}L1AH{dg9h0+^q7c2w=%frby0#c@C@&y@dIiH3R@4uBLk)Z
z7U=uO-1Ko%?T$`pX)1UtmpO4LA}W~N7&HWW=Lp23*#lK9qP
zS+N1FybV#pgm@Y|iS!`RHUf}g%3IJkqb9foO`h_*Uz*(~l$~oifsi2Rs9=*aCs6lg
zXK$ky`MmC8`#>6fWnp3Gxo>lh`(~0$49~uy;_Iy0#%Yt-bqRj#2sP?a<%ChUHXu&P
zy<^
z>AP_fEXQMyW!iXomPF%f=jrKxq7Q781%}RVd=8l4q40Q(&3yKyJ0c5H7AN2{6LGMs
zOOA=TeCKPrT_tlSmzyqtm`R&9MX9f|^HLn}z5BKa8Fy~7NY`8atKxubU(
g|2v7r`(ONDsrs*Hukx??Z%VZSmHJr+p>hWP2b#$-p#T5?
diff --git a/figures/subsampling.svg b/figures/subsampling.svg
index aaebcec..c6073fb 100644
--- a/figures/subsampling.svg
+++ b/figures/subsampling.svg
@@ -1,7 +1,7 @@
-
diff --git a/index.Rmd b/index.Rmd
index cf2940b..9f911cd 100644
--- a/index.Rmd
+++ b/index.Rmd
@@ -28,6 +28,7 @@ opts_chunk$set(echo = FALSE,
# Summary
+- The MetabarSchool Package
- What do the reading numbers per PCR mean?
- Rarefaction vs. relative frequencies
- alpha diversity metrics
@@ -35,6 +36,28 @@ opts_chunk$set(echo = FALSE,
- multidimentionnal analysis
- comparison between datasets
+# The MetabarSchool Package
+
+## Instaling the package
+
+You need the *devtools* package
+
+```{r eval=FALSE, echo=TRUE}
+install.packages("devtools",dependencies = TRUE)
+```
+
+Then you can install *MetabarSchool*
+
+```{r eval=FALSE, echo=TRUE}
+devtools::install_git("https://git.metabarcoding.org/MetabarcodingSchool/biodiversity-metrics.git")
+```
+
+You will also need the *vegan* package
+
+```{r eval=FALSE, echo=TRUE}
+install.packages("vegan",dependencies = TRUE)
+```
+
# The dataset
## The mock community {.flexbox .vcenter .smaller}
@@ -68,6 +91,8 @@ data("positive.samples")
## Loading data
```{r echo=TRUE}
+library(MetabarSchool)
+
data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -94,6 +119,8 @@ positive.count[1:5,1:5]
## Loading data
```{r echo=TRUE}
+library(MetabarSchool)
+
data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -119,6 +146,8 @@ head(positive.samples,n=3)
## Loading data
```{r echo=TRUE}
+library(MetabarSchool)
+
data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -169,7 +198,7 @@ positive.motus = positive.motus[are.not.singleton,]
$`r nrow(positive.count)` \; PCRs \; \times \; `r ncol(positive.count)` \; MOTUs$
matrix
-## Not all the PCR have the number of reads {.flexbox .vcenter}
+## Not all the PCR have the same number of reads {.flexbox .vcenter}
Despite all standardization efforts
@@ -240,13 +269,19 @@ positive.count.rarefied = rrarefy(positive.count,2000)
## Rarefying read count (2) {.flexbox .vcenter}
-```{r fig.height=3}
+```{r fig.height=4}
par(mfrow=c(1,2),bg=NA)
hist(log10(colSums(positive.count)+1),
main = "Not rarefied",
+ xlim = c(0,6),
+ ylim = c(0,2300),
+ breaks = 30,
xlab = TeX("$\\log_{10}(reads per MOTUs)$"))
hist(log10(colSums(positive.count.rarefied)+1),
main = "Rarefied data",
+ xlim = c(0,6),
+ ylim = c(0,2300),
+ breaks = 30,
xlab = TeX("$\\log_{10}(reads per MOTUs)$"))
```
@@ -325,11 +360,11 @@ knitr::include_graphics("figures/diversity.svg")
@Whittaker:10:00
-- $\alpha-diversity$ : Mean diversity per site ($species/site$)
+- $\alpha\text{-diversity}$ : Mean diversity per site ($species/site$)
-- $\gamma-diversity$ : Regional biodiversity ($species/region$)
+- $\gamma\text{-diversity}$ : Regional biodiversity ($species/region$)
-- $\beta-diversity$ : $\beta = \frac{\gamma}{\alpha}$ ($site$)
+- $\beta\text{-diversity}$ : $\beta = \frac{\gamma}{\alpha}$ ($sites/region$)
@@ -410,25 +445,19 @@ kable(data.frame(`Gini-Simpson`=GS),
kable_styling(position = "center")
```
-## Shanon entropy {.smaller}
+## Shannon entropy {.smaller}
-
-Shanon entropy is based on information theory.
+Shannon entropy is based on information theory:
-Let $X$ be a uniformly distributed random variable with values in $A$
+
+$H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}$
+
-$$
-H(X) = \log|A|
-$$
-
-
-
+if $A$ is a community where every species are equally represented then
$$
-H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}
+H(A) = \log|A|
$$
-
-
```{r out.width = "400px"}
@@ -441,7 +470,7 @@ H = - rowSums(environments * log(environments),na.rm = TRUE)
```
```{r}
-kable(data.frame(`Shanon index`=H),
+kable(data.frame(`Shannon index`=H),
format="html",
align = 'rr') %>%
kable_styling(position = "center")
@@ -452,12 +481,12 @@ kable(data.frame(`Shanon index`=H),
As :
$$
-H(X) = \log|A| \;\Rightarrow\; ^1D = e^{H(X)}
+H(A) = \log|A| \;\Rightarrow\; ^1D = e^{H(A)}
$$
-where $^1D$ is the theoretical number of species in a evenly distributed community that would have the same Shanon's entropy than ours.
+where $^1D$ is the theoretical number of species in a evenly distributed community that would have the same Shannon's entropy than ours.
@@ -548,10 +577,10 @@ exp_q = function(x,q=1) {
}
```
-## Generalised Shanon entropy
+## Generalised Shannon entropy
$$
-^qH = - \sum_{i=1}^S pi \times ^q\log pi
+^qH = - \sum_{i=1}^S p_i \; ^q\log p_i
$$
```{r echo=TRUE, eval=FALSE}
@@ -616,7 +645,7 @@ abline(v=c(0,1,2),lty=2,col=4:6)
- $^0H(X) = S - 1$ : the richness minus one.
-- $^1H(X) = H^{\prime}$ : the Shanon's entropy.
+- $^1H(X) = H^{\prime}$ : the Shannon's entropy.
- $^2H(X) = 1 - \lambda$ : Gini-Simpson's index.
@@ -1074,15 +1103,15 @@ BC_{jk}=\frac{\sum _{i=1}^{p}(N_{ij} - min(N_{ij},N_{ik}) + (N_{ik} - min(N_{ij}
$$
$$
-BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}}
+BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}}
$$
$$
-BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{1+1}
+BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{1+1}
$$
$$
-BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}]N_{ij} - N_{ik}|
+BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}|N_{ij} - N_{ik}|
$$
## Principale coordinate analysis (1) {.flexbox .vcenter}
@@ -1109,7 +1138,7 @@ plot(guiana.bc.pcoa$points[,1:2],
xlab="Axis 1",
ylab="Axis 2",
main = "Bray Curtis on Rel. Freqs")
-plot(guiana.euc.pcoa$points[,1:2],
+plot(guiana.euc.pcoa$points[,1],-guiana.euc.pcoa$points[,2],
col = samples.type,
asp = 1,
xlab="Axis 1",
@@ -1123,7 +1152,7 @@ plot(guiana.jac.1.pcoa$points[,1:2],
xlab="Axis 1",
ylab="Axis 2",
main = "Jaccard on presence (0.1%)")
-plot(guiana.jac.10.pcoa$points[,1:2],
+plot(-guiana.jac.10.pcoa$points[,1],guiana.jac.10.pcoa$points[,2],
col = samples.type,
asp = 1,
xlab="Axis 1",
@@ -1162,6 +1191,58 @@ plot(0,type='n',axes=FALSE,ann=FALSE)
legend("topleft",legend = levels(samples.type),fill = 1:4,cex=1.2)
```
+
+
## Comparing diversity of the environments
```{r}
diff --git a/index.html b/index.html
index eeb275f..078b191 100644
--- a/index.html
+++ b/index.html
@@ -4,7 +4,6 @@
Biodiversity metrics and metabarcoding
-
@@ -82,6 +81,10 @@
transition: opacity 0.6s ease-in 0.4s;
opacity: 0;
}
+/* https://github.com/ropensci/plotly/pull/524#issuecomment-468142578 */
+slide:not(.current) .plotly.html-widget{
+ display: block;
+}
@@ -106,14 +109,31 @@
Summary
+- The MetabarSchool Package
- What do the reading numbers per PCR mean?
-- Rarefaction vs. relative frequencies
+- Rarefaction vs. relative frequencies
- alpha diversity metrics
- beta diversity metrics
- multidimentionnal analysis
- comparison between datasets
+The MetabarSchool Package
+
+Instaling the package
+
+You need the devtools package
+
+install.packages("devtools",dependencies = TRUE)
+
+Then you can install MetabarSchool
+
+devtools::install_git("https://git.metabarcoding.org/MetabarcodingSchool/biodiversity-metrics.git")
+
+You will also need the vegan package
+
+install.packages("vegan",dependencies = TRUE)
+
The dataset
The mock community
The experiment
Loading data
-data("positive.count")
+library(MetabarSchool)
+
+data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -880,7 +905,9 @@ sample.TM_POS_d16_2_a_A1
Loading data
-data("positive.count")
+library(MetabarSchool)
+
+data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -992,7 +1019,9 @@ sample.TM_POS_d16_1_b_A2
Loading data
-data("positive.count")
+library(MetabarSchool)
+
+data("positive.count")
data("positive.samples")
data("positive.motus")
@@ -1212,11 +1241,11 @@ positive.motus = positive.motus[are.not.singleton,]
positive.count is now a \(192 \; PCRs \; \times \; 5579 \; MOTUs\) matrix
-Not all the PCR have the number of reads
+Not all the PCR have the same number of reads
Despite all standardization efforts
-
+
Is it related to the amount of DNA in the extract ?
@@ -1227,7 +1256,7 @@ positive.motus = positive.motus[are.not.singleton,]
boxplot(rowSums(positive.count) ~ positive.samples$dilution,log="y")
abline(h = median(rowSums(positive.count)),lw=2,col="red",lty=2)
-
+
@@ -1268,7 +1297,7 @@ Only 7.4% of the PCR read count variation is explain by dilution
Rarefying read count (2)
-
+
Rarefying read count (3)
@@ -1284,16 +1313,16 @@ are.still.present[1:5]
## are.still.present
## FALSE TRUE
-## 1942 3637
+## 1886 3693
Rarefying read count (4)
par(bg=NA)
boxplot(colSums(positive.count) ~ are.still.present, log="y")
-
+
-The MOTUs removed by rarefaction were at most occurring 21 times
+The MOTUs removed by rarefaction were at most occurring 13 times
The MOTUs kept by rarefaction were at least occurring 2 times
@@ -1306,7 +1335,7 @@ positive.motus.rare = positive.motus[are.still.present,]
-positive.motus.rare is now a \(192 \; PCRs \; \times \; 3637 \; MOTUs\)
+positive.motus.rare is now a \(192 \; PCRs \; \times \; 3693 \; MOTUs\)
@@ -1340,9 +1369,9 @@ positive.motus.rare is now a \(192 \; PCRs \; \times \; 3637 \; MOTUs\)
Whittaker (2010)
-\(\alpha-diversity\) : Mean diversity per site (\(species/site\))
-\(\gamma-diversity\) : Regional biodiversity (\(species/region\))
-\(\beta-diversity\) : \(\beta = \frac{\gamma}{\alpha}\) (\(site\))
+\(\alpha\text{-diversity}\) : Mean diversity per site (\(species/site\))
+\(\gamma\text{-diversity}\) : Regional biodiversity (\(species/region\))
+\(\beta\text{-diversity}\) : \(\beta = \frac{\gamma}{\alpha}\) (\(sites/region\))
\(\alpha\)-diversity
@@ -1583,10 +1612,10 @@ Environment.2
-Gini-Simpson's index
+Gini-Simpson’s index
-
The Simpson's index is the probability of having the same species twice when you randomly select two specimens.
+The Simpson’s index is the probability of having the same species twice when you randomly select two specimens.
\[
@@ -1597,7 +1626,7 @@ Environment.2
\(\lambda\) decrease when complexity of your ecosystem increase.
-
Gini-Simpson's index defined as \(1-\lambda\) increase with diversity
+
Gini-Simpson’s index defined as \(1-\lambda\) increase with diversity

@@ -1663,24 +1692,20 @@ Environment.2
-
Shanon entropy
+Shannon entropy
-
-
Shanon entropy is based on information theory.
+
Shannon entropy is based on information theory:
-
Let \(X\) be a uniformly distributed random variable with values in \(A\)
+
-\[
-H(X) = \log|A|
+\(H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}\)
+
+
+
+
if \(A\) is a community where every species are equally represented then \[
+H(A) = \log|A|
\]
-
-
-
-
\[
-H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}
-\]
-
@@ -1701,7 +1726,7 @@ H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}
-Shanon.index
+Shannon.index
|
@@ -1747,15 +1772,15 @@ Environment.2
-Hill's number
+Hill’s number
As : \[
-H(X) = \log|A| \;\Rightarrow\; ^1D = e^{H(X)}
+H(A) = \log|A| \;\Rightarrow\; ^1D = e^{H(A)}
\]
-
where \(^1D\) is the theoretical number of species in a evenly distributed community that would have the same Shanon's entropy than ours.
+where \(^1D\) is the theoretical number of species in a evenly distributed community that would have the same Shannon’s entropy than ours.
@@ -1851,7 +1876,7 @@ Environment.2
Impact of \(q\) on the log_q function
-
+
And its inverse function
@@ -1871,17 +1896,17 @@ Environment.2
(1 + (1-q)*x)^(1/(1-q))
}
-Generalised Shanon entropy
+Generalised Shannon entropy
\[
-^qH = - \sum_{i=1}^S pi \times ^q\log pi
+^qH = - \sum_{i=1}^S p_i \; ^q\log p_i
\]
H_q = function(x,q=1) {
sum(x * log_q(1/x,q),na.rm = TRUE)
}
-and generalized the previously presented Hill's number
+and generalized the previously presented Hill’s number
\[
^qD=^qe^{^qH}
@@ -1908,22 +1933,22 @@ qs = seq(from=0,to=3,by=0.1)
environments.hq = apply(environments,MARGIN = 1,H_spectrum,q=qs)
environments.dq = apply(environments,MARGIN = 1,D_spectrum,q=qs)
-

+
Generalized entropy \(vs\) \(\alpha\)-diversity indices
\(^0H(X) = S - 1\) : the richness minus one.
-\(^1H(X) = H^{\prime}\) : the Shanon's entropy.
-\(^2H(X) = 1 - \lambda\) : Gini-Simpson's index.
+\(^1H(X) = H^{\prime}\) : the Shannon’s entropy.
+\(^2H(X) = 1 - \lambda\) : Gini-Simpson’s index.
-When computing the exponential of entropy : Hill's number
+When computing the exponential of entropy : Hill’s number
\(^0D(X) = S\) : The richness.
\(^1D(X) = e^{H^{\prime}}\) : The number of species in an even community having the same \(H^{\prime}\).
-\(^2D(X) = 1 / \lambda\) : The number of species in an even community having the same Gini-Simpson's index.
+\(^2D(X) = 1 / \lambda\) : The number of species in an even community having the same Gini-Simpson’s index.
@@ -1941,7 +1966,7 @@ environments.dq = apply(environments,MARGIN = 1,D_spectrum,q=qs)
H.mock = H_spectrum(plants.16$dilution,qs)
D.mock = D_spectrum(plants.16$dilution,qs)
-
+
Biodiversity spectrum and metabarcoding (1)
@@ -1950,11 +1975,11 @@ D.mock = D_spectrum(plants.16$dilution,qs)
FUN = H_spectrum,
q=qs)
-
+
Biodiversity spectrum and metabarcoding (2)
-
+
Biodiversity spectrum and metabarcoding (3)
@@ -1963,7 +1988,7 @@ D.mock = D_spectrum(plants.16$dilution,qs)
FUN = D_spectrum,
q=qs)
-
+
Impact of data cleaning on \(\alpha\)-diversity (1)
@@ -1999,7 +2024,7 @@ positive.clean.H = apply(positive.clean.count.relfreq,
FUN = H_spectrum,
q=qs)
-
+
Impact of data cleaning on \(\alpha\)-diversity (3)
@@ -2008,7 +2033,7 @@ positive.clean.H = apply(positive.clean.count.relfreq,
FUN = D_spectrum,
q=qs)
-
+
\(\beta\)-diversity
@@ -2102,7 +2127,7 @@ d_c =& \max\limits_{1\leqslant i \leqslant n}\left|a_i - b_i\right| \\
\end{align}
\]
-For the fun… ;-)
+For the fun… ;-)
You can generalize those distances as a norm of order \(k\)
@@ -2140,7 +2165,7 @@ d(x,z)\leq \max(d(x,y),d(y,z))
A metric induce a metric space
In a metric space rotations are isometries
This means that rotations are not changing distances between objects
-Multidimensional scaling (PCA, PCoA, CoA…) are rotations
+Multidimensional scaling (PCA, PCoA, CoA…) are rotations
The data set
@@ -2167,7 +2192,7 @@ data("guiana.samples")
s = tag_bad_pcr(guiana.samples$sample,guiana.count)
-
+
guiana.count.clean = guiana.count[s$keep,]
guiana.samples.clean = guiana.samples[s$keep,]
@@ -2182,7 +2207,7 @@ guiana.samples.clean = guiana.samples[s$keep,]
s = tag_bad_pcr(guiana.samples.clean$sample,guiana.count.clean)
-
+
guiana.count.clean = guiana.count.clean[s$keep,]
guiana.samples.clean = guiana.samples.clean[s$keep,]
@@ -2197,7 +2222,7 @@ guiana.samples.clean = guiana.samples.clean[s$keep,]
s = tag_bad_pcr(guiana.samples.clean$sample,guiana.count.clean)
-
+
guiana.count.clean = guiana.count.clean[s$keep,]
guiana.samples.clean = guiana.samples.clean[s$keep,]
@@ -2264,7 +2289,7 @@ xy = xy[,1:2]
xy.hellinger = decostand(xy,method = "hellinger")
-

+
@@ -2284,15 +2309,15 @@ BC_{jk}=\frac{\sum _{i=1}^{p}(N_{ij} - min(N_{ij},N_{ik}) + (N_{ik} - min(N_{ij}
\]
\[
-BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}}
+BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}}
\]
\[
-BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{1+1}
+BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{1+1}
\]
\[
-BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}]N_{ij} - N_{ik}|
+BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}|N_{ij} - N_{ik}|
\]
Principale coordinate analysis (1)
@@ -2305,17 +2330,36 @@ guiana.jac.50.pcoa = cmdscale(guiana.jac.50.dist,k=3,eig = TRUE)
Principale coordinate analysis (2)
-
+
Principale composante analysis
guiana.hellinger.pca = prcomp(guiana.hellinger.final,center = TRUE, scale. = FALSE)
-
+
+
+
Comparing diversity of the environments
-
+
Bibliography