From 050956c01b713e5f3d2f6d686a57df861b73bacd Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 3 Nov 2019 13:12:58 -0500 Subject: [PATCH] Some corrections on the lecture --- NAMESPACE | 1 + R/norme.R | 16 +++ figures/dist_hellinger.afdesign | Bin 20272 -> 20470 bytes figures/diversity.afdesign | Bin 37400 -> 37532 bytes figures/subsampling.svg | 22 ++-- index.Rmd | 139 ++++++++++++++++++++------ index.html | 172 ++++++++++++++++++++------------ 7 files changed, 246 insertions(+), 104 deletions(-) create mode 100644 R/norme.R diff --git a/NAMESPACE b/NAMESPACE index 835f9f3..1d0f808 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,5 +7,6 @@ export(H_spectrum) export(exp_q) export(log_q) export(mode) +export(norm) export(tag_bad_pcr) importFrom(Rdpack,reprompt) diff --git a/R/norme.R b/R/norme.R new file mode 100644 index 0000000..5d45a23 --- /dev/null +++ b/R/norme.R @@ -0,0 +1,16 @@ +#' @export +norm <- function(data,l=2) { + no <- function(x,y) sum(abs(data[x,]-data[y,])^l)^(1/l) + n = nrow(data) + d = matrix(0,nrow = n,ncol = n) + for (i in 1:n) + for (j in i:n) { + d[i,j] <- no(i,j) + d[j,i] <- d[i,j] + } + + rownames(d) = rownames(data) + colnames(d) = rownames(data) + + as.dist(d) +} diff --git a/figures/dist_hellinger.afdesign b/figures/dist_hellinger.afdesign index fb6838d202abc4251787f489ff496e8307cbb7c9..ac8d3512815547ec68382d9ff9eb0a4fbfdfd184 100644 GIT binary patch delta 4436 zcmZ`-WmJ?4*B!bWX{02CfuTbjIwhr>85(Isnt3RJA*4}CK#7Y=r!**ybayw1Gz<*i zz3+YBpWj|X}*5(!p*T`wRtCBUlD1(3Lmbj zub5sI1{Ad9f-Gy1*6~kazqYP%z&yO0SQY|#8!3Z^U26l(c>Q}8y*sR{2Z$QkGP@)j zi)4L-Ve`aN#Y%;BJ7Ni{_|B?8H$_Y=lah@=ME2#-?5LLL++o#9Hxs;jmefsw;vpQ`zR{=>5ZNr`2rGt{m%;H#a(9G!a9N6d@g zvDuUyQ{&@TJnqUFj$2ntdJ_y#%AF_?K5x7&xQ4k-)$Ek}C7KYXLNQ<3twqOQ@|4kP zN)2r8!9fP1w*_7+Q7bt$u15lYe&@-0J6sn_|8(PT4@RTr7JhE;G=QZhph88UdNM;Dt}FMJ|`RM{Bf=+F_CeSXmbQR+> z^6scDmDy2&*_hFZ6EeRRhv5i7(epE zY`9~YNp?2O-wm19Xi$eVTP?N$nu}-WihjB6Xq7+931<&?%17myED86v5_jc;W<2YH zCg@~Ta}zh>rqwH|V+AjQ#X`_`%yeBD2s|m!DGRU}W+D*|B7Jc#?bEXLzxg4(UO$~lNY8&ZH4$E|{HA{EI((kG6jd~EmSTqlWNHs|3skLHl7BKY9G0X-wZMOmJqP;xrT zfy%bl^Oim|+JS#HBjl{3#@rCUt@#l)A3Kk+)c+Cgi%fT8E^zz|@+5Msesh@OWl`s3 zztI}hacGlya?H|pJO5|qGF~K2lZkgqB9bNe`gA)8*p8R#y5II}=VQ2Qs7CH|d~s+& zzTUqJ!F05e0nSrn)>X8soF?m?u4>kAulX4)BNdIGN9(YeT{69E1M2)n8iuXNlz4Wf zf(!Q-{q%eGmfoZ`>YRSN-tf5UP%kgi)ze@*zj=QZvJ)NAfXqWQ-uE4={~%Qnr(geMWOu6`Ot%6ORW>EF?Z;uNuOB=QhVEM%%F2kA7totz z?tKj*o;S@o{#Wx;D1GA<39FyI4;3bPH)lGucf~ht-;0WNA|!du1`0BUhxpjeF{z72 z26{(1vzfk|#TJc@*xbhukh599dCE{@E(NsgrdJDgIu1%?FINd(j{5|(PrXGuKFrbAWDx|Dw88jIcU2>B?ol(`?%u;U!(N19u?>P_?XPB@amY+bP&JB ziL_MA-p^WqU+8^`VPtpK?S_&D+-uKt?^upBNTGYSX4+ZwieR zkW?wjA)hRS3FY5BAN8sTk5^+%s`vrCvngfn9_Li1S?Dj+pvdjnT|F6c>lLV+bw@?J zL!*Dp+>OOmR&CWh^D4(9(Xb_;=cH2%?-dTGtcw@@R@gsKY#*N-;-ELhQB|Cr#_+RS zWD!o{1YHT|YAH8^uhVHh391}V6*tJNmaW#WQMnZ?-q5xdBwHsX#gbuSH-iG$lT_0Z zm^J$}j0Uk~=8||}dqx~`)Nc+7r}&0(G%Byi>IxmW-KZO050n>_6#neWR>&&I{L=RP z3(2O>l$~CWNz<2O2+cLYvZ`iPWyt1-1el64?q|#ad&Wn)P(j|9c}8n@IY<3OEfN?UYx%jBi3hG0i&2R|>fDeDFhFc|(dk%^4qSdL zw0tCWw=Wi?j4pZbLN0o9{9qJmh;`4iuZHI=DZ>tcTP5~d+lKeBhUu6AY`s1OzggK{ zm5m@9tTlT3&1hwK~Gtz`jw}GlM3)mGM zZQ8G>G;u5xws5al%?PlK+096?g{~KZ2Kf(GLUwYEq{;8%z5!S2K0An7Qvp7#-XY<> zL2<0Fy53H>vtz&81=>*wj_y<nCtGYBVvzv#91`Uy+#$^^V@XW#MV^P z$EY2vWp3z6!ZV6&o15d~(T$+QMd&d3^+GT`OLMp1Bx>@)R<`-(Eo09Px+?EjS;JXO zKQ95{|Hnwf3fOGvdB`pEL;h~LLK~BjFp=NT35#)wwpL`+(a>%-mXAVMGv%EU5@2m4 z%v~~KGNQ2J*D4BqKHCes_I>t!xo%Dxfon*AkJ{@)dmZg0tT4f~El)x#0W*qgTChd} zxgS@$J!E{b6lSFoy$Ge8upg@R{v0bnwK}mS(B-%WV7!7y!j)vgn8EeyW2adxxU}mO zC+9?_Ig+)-?|-l$x#@ch;{&D+wCX?UV7||YJLr!rW@wFI@W1Q{b(q&MPY5CA>n?gz zQ#5L2GM=`Tl(lu@z{(O~%3ZC#uE}Lk*{W-?pp1|k49hie2jg_ZQB;Cic9{r~x>`rV(*TL{iZ zdovH23AerBOlc`&bTDR#^L_D+yEB$+UJ_Uom7hriS;iTV$gz6CXj!MRG3^7K<>*W+ zSrHqai9|1~hmEj0v8B6i*}7EP%gI@5@ebdmgZrq7rLXDdc=XBrRVr7U7GVr7m>8Zk zW+PARtzMQU-lX3iYPTg-ryvGS-#>l8NAPxpaXaGv!IlaXek%KsxEB|6;@`-TX$N3q zO0DlobPe?@g-QkBuz8^Jq;aP7%CSCvHz33$QT0@i#$kv9XUKBzlBbpob?wZdHcOO(ThSH`;^a0-f8M3>1rj9)a|^euZT#^FFd^H zVv6?CH8X8_7ztpY?gmrXxmp24tP7xS%|GbmdB(sLC<-?36l1N$NAvUuK7vI5z|xr= zG~?C&QXr!=h#q0FW;xA3)T@)j=#bUUrekzOefx!;#&}tHoi9p0i-O42ePBzxf5DyN z#FLbN;6||fLBOe^&QH_d(rBI7IkqM7_edV1D0)bvWqH)o46j^3@?_zKwSWe7;*M*I z!;5C?Po3%sPWWTadLm55#<^Lh@OB`d(ep*I?7*R5i_P01x?o(rn^!8ocp8qO9<9>M zW0|4XF)JmjD$dZ86|71}b2GAW3IeN}j05g};{YJEVvO2_N^dD+~Hu`$Q@G z5r_X_5*WnKuk~|$Q^!blz6t0tqNmsMrHYeXxwYBg*B&FumItKPmxhk2j}#Wi)}P0y zGHUUPon2^sglHEzv{_?QcHdNgw+RgYb#Z1$Ri}8=+j@D{yKD&VxK*Zjcq6olZk-`* zvAVddImLs-jj|~LVd)aF!Rcb|(V_S0V--P1J4?$VR&9I}`@yv_vVigCL*`8XE+nX; z1wAM6-ts~kf_qyL|M(I;)6*-^vVHM_Fkf^F;)C!&zgY|T6nvyF+kTdO)k?d3zcBkn zo4xJ(o`#&%eu(qK>WAQG6f6Ae8nG9@l+m!NkkasC0jO1hMHeOy|@w5YceRb zQ(&_1RiUiDHJ@nwI80O6H(tiqKU1rdvr;7RTOGboqfscQICW>)YhF;knjldR8GO4p zQ&-I4A!&CqmG?E7N$bjh64G3V#9{x&lbASHk+8&8wSWob0>}$0v0N@8sj-7XaEOhN zQ*$9I*iY$Fd77S1O!iTBBaNp7iJqR{Do}PWY(LSlf<={AOA>bKOx0;iu$#5D{Bgh8 zF1IZ6TSQ>^hwlzj#TpwK2AwnUx?^-h%WB(S`!xHh&bYYz2KKL3?Q1RnaN>8ZyT zifg<;iO5ZQr$+jD-SbM9mIHn@)!%^`i1W^{%3PxA6%zax)SsgH3PieCO}_PR4G>M@ zIAgw`0amjy{Nb|@#$Dkb7ARtT@uY73UtM$HHy!MOr}0GN{T+4HSzpUzUF?x@a>6(= zrVu-Wh;B%@%{FFat5#3YXL6qHa(}+B$^`B5NDbI62L{sWtw;*H7Y(tfy0_g#Ci$=O zvu%;2h;guSo_La|86z6iuSimE_o=R6XS^>KP~*xc;gX2Y$`YQZh)X2xHs)q%ImkUY zbvZ31Zm~45yh(Gno7psHoK$MkF~<^2F=w!d?`C-AHcOps8CF~RUBIoaHbD7DV9DT? znIqTjj8YM-N(xp+5|NQ^Ix$}Dwot$A#+N;@Ef-fAc(I@H=p#*{qm(CjjD|{T57HVPIKxbZ^2W;(wJe5XcRtgv9=l?-Z(DIPoEKzIQX@+;@n$w5 z7~TZC0vN4f+B8iPI3gV29*q}Cph>H0}o{9VJlz<_xb0Nfs>m}O(gj@ng0Vv Ca%zzP delta 4195 zcmZ8lg;UfE+ua4cA`QaQQtE;tf(R@POGyZb#7c=EA+@wi{^*7UrMsn;5D8(WLFrsV zx&&eAkly#+dG9;l%y(v<=Q(HQ`~l}VC)wbtEU*%Ype}z41Ohp_x@hyLI$A+!{uY13 z=il2+{a^jREwXHA{0}GkceGuQZp86QP{8WFa?^ZHVz=bOp1&&aBRkK2j^XiE9&gnp zK2GV~bT-zdWppx=X&3hsD)JZU*(XP6>o(kNT{{pL_-cHQtzyp^#sncW;rh?%B*7eQyf$u{!!CS%`Tg>%G);HdFYEvBKD`%3C9i z$~EaNzL`>xGvU^V@0}|`KhH?pMV7CMasL3}Y=zoH)~|hVX4ghqV~VAQixC9edA1W6 z>qllK!ItM*^@^B=*{c2aoeZ<|SE++tMP|H#*j0Mr^x5EW_L=(1`RdGa`B=KZpklW> zi+ohe9!F*jl1Y+pRO-#3aIFqmsr|Iu6)I9$d)e2z*_XdFkT($(I8H)0kq z`)94vL|D`h*P&JThx(h}9(g{X*u1~HK)((&Tlij3F2q@lmAIfv@is6i7IRL*7^6(U8A>5vf^V*VlnTMK=)4KZ1NsMY!w&&^2;pF_?xq81Hj%NE=X2YlhPcr^OEPZUG7P#*IJJ(wC`vT7xV&xt9QZs8#R&p_*iG z%djKJ>3$x9Cr4m}-9$VH`{mru*TQ=NA|A_QYMs*p9l`0L7QvNwSHti$)0X0myB^F}v{z!{Mm0|h{vv=Gig@m@zv0J)iF42z*)uzG!bglXzv~P@_TTnPl8eg4 z*F=}Gxww0!Q?I^R&;_Ogy#Eet?YBs!S(*J+V43+A z$0`+^&geIk=FEzRB;dOe+OI^GILxhew4X5jrq3Xc?$YN(034G;)J zQQ>WWhQZ2?c3(Cee7+vhvS6UzpB;BI$5JCl9darKA3t6d()*~7Ne^7MV)zPxD9P4UObtcl{GC#6Oh%?f5JEhE-VkPYSh(+ZhGhzBq;D@W*NXY zZv5!Y5zdfHws%?Gw24)gMQ`(n$^|=xw3Rc%D??1fZG>0zh`-Vb!vpTaRmawRH5t#d zd+Y(fp3cmA0F4Zx5nY6}k>Ud7l&MxRxv;-Fo%IW|mJ%2&9f5AAyd!ehlMf2d27IM= z5icMETy;hGP4?ag{e^ESUz8MG!O9A&8ji{->SZatl_t?4guPA%mWfd(Y#gC)UK7H7 z4hj)~DSyhK5lwIs$~og++%IpZP-Ts)06JpKOIW)`m6YfvdkUheB|=8pey7gFDwsLX zouH+>s6JJm$FVVpOm9`Uh19^Y5;dSV`Y`ws1)p#i>&y3OJ@8 z)|{Fm?YBYTwfQRSmsm*d8^wTLBw#-3SHL*z+oWX^KBRqgD???dWU_dv8FDvrnIl3m z>FO)Wgt(HK=sqTOgW9VR3fZ>xa6X7fEL#n2e{r#yu3Uq0K2hFH{N2&TPA@v z7y}jIe)b=h>_vg6!EYVzzT-_$AroFMQW{x=CzjP5gTFExkshxAg4tH`Eek(ls)EzL zud7#Ij9NGfArza*qYda^={+#5x{fBH1(Q4YCO)#(1xs3Mw3`#GmPL+rfCCTskt2R9 z2w!P*dG$^>JZGUNSdwNB1``Bf9SWvLL6C{1fwACik0~m1SLOQD0ecEd{2q?O&++i_ zFMQI6xd4SqlO~mA;Al6w_>ow(m&J}~4~Lt*XiR+*sr@xNM={ug?SzhPf_a2lfy|SM z>ihbXy^mI&tOSPtU_1gCt@K&D1kQW9R$vN+8sYWVTkp4a(q@c zq?&`+XI83Dl{Rz`VqH-!Y_sI}c9nN|s3ik|3y9*9B`~gp{+tfCd+GX(C%>%neSz!q zy(es21)^=bt&ema_dxm&0~TG%lZfRhu1{vYwtoJZ_pQfKk`MsiEnm91uN=svzV=q3 z8j4GofmX6rwlr|jx$h+PuGiu8OyO(|p>D5n{aU&GysMqPeE!RJ5?cRC-I-VSD)Q&b z#@kR^!sSEn#G5Uq&^HGBK-0s6&a93<(KTe34%ld|Sr#d78Q+d6G}3dwdw;s}}ZQ2F)a6wlW_hbSTWB5Aon0puux$WrqMl#I+XH_@6 z!nGmBG?Z6;+g_og`c@WVf&Q_KSTz({{F&_@mSvzdT@m^hA#i0wXnDx;0FOzPy?sZ2 z@g&eJ+m$$@z;`xJOd=4(a&pd`M~i?(I^~1b;_xaNl5`@)CcMYk zfmcQa0Dl1Gux1pNvVfUOy%V~g@%NXABK!1l`wIB>(`o4?BiBlSf}4n_6`bcZDV-SF zNtG{!@^VR~$*Qqt2cf2}5d!Q%Ou?x0a^6EUjoG!TJd=mXC-%S{o5>abnqqnqmY5|k z57MW!9cE@Mg`7@>n#SB7%Foe(+6J~5b-^NlQ(ivhW&5D2AQ(J8OK0oIEK(VqW}xh; z#;e8bZg|VxkX8xOy^obkECtB({rMkN7Nlgg`DiFFWFuW$ZQ|fat6)Ccwx=D=SRQd~ z^sAf!5za55Yu;whV&h@9QsFnd%zVZ@dXMPGY8jLWbF4w;zt06WAw>-uS)-Y#x7g6QS>FCc%w5wYiY8K6 zfgE|0tAzNqpsvj6u9CK!_kMIeta({`p)St7*-`>7y3J#2nkX3trsC8 z_kXN(Tg%Z zeTKiMh|jd6GC9#hs_2EBu5`J_`3t2@*iNu!U`>tH2E2s$^BYX-^rOqC4O;+2t+tq& z58N$Vmd2oJYVlLo`QyluWu&5*73w+OVy&K=T!5?5Cq7Xu$}LgaF$i&p8)<>etxGR6 zSI%bI9n0vK%f2okRBu?G8g!O9qINL%3^S7c`CE~TNJyKvaEb2;qWnh&; zr>?o;oWj(r`pUvGR3WuQ;(G?wtu^LelWnH^1=&zE7%o+d&(jv1#;{uulk|6|bi zRdc~mm316QF3F52jH9z4M68f?%b5O4`n$}pH_N40SC>00ry;_}K2O1+gZYXz5j@(l zSGlZAdFb=E@LNIOx|tuox!*Jto)dhVOliR}5~7#;I%~F_zo3eb>BR=al#9#vxs6P{ zy65|G4<;qLw!XM{0xNR|F`1G^yVwGVfrk(_CBF~#40o6JUSLDzt|4|JMH$J=%)N303{_#%@a zrG3f&K~UvG9C4j({?zRovdY|WYa_&x;0c8PYg5#d$BFWs8}r3vLc&STMuCy-L@#{a zP}{qgSBzaQHMARxHA{ytoLIGg*6@54%8U}XFB?a;^-0c>Wyj-a)_9U8l!6o3!7amV zk?%VUIuzm^dx04*ifix!CHOs+#IgpxG@fO*@#g-9$f2f z6ysD|nn|xoib|Hj)y1xq9oE4VeLtL&T~Jo<2}>DB*-bqN9S;k_P_mkrY7PDj-5vd< zo}MfMlpmk_G>dqYBK3@KF*Ye+YTvXvyObFk);Z4o#JYRBPV69Q^(&00Kl2aMk@DY& zEIgwo`F|oG}$> zB)zmYeplFhpU)%Hd@>p)$g})%)IHKfc)2y&I$TS2651_*>^c)NGg(iQr8dIf;8B*> o{r}(o|Fr=h&uQ4WLEwK3&;E5uiv5@TuPWeuNPbWQqivS?4-_jOSO5S3 diff --git a/figures/diversity.afdesign b/figures/diversity.afdesign index 807a90415e505b4fb65a4231d594e297b9ef34fc..5491315d4d496a660303ae7fc3569de7f173a1da 100644 GIT binary patch delta 4939 zcmX|FWmJ@n*2SSiKw=m=rKGz%g`tO*M!Gv68eXZPyPH7?0i^{&8l+o58l-DLz|VKx zd%wNT*|E;g^KWODgYqjt0LmXVS!@&(6jv`#Ek;$>w{t{)!(WIK|3{<$oBxwgP{5BC zrvG}hzYF@iTAps+7=M*f$-&wG*|GR@e_dGSX*a0l88y06KRFMHV(M}xS?F7&lUZmk z5p-FZ$XKS}KDV>Ak(+ySrviX>;IWTPHVh>*PP}`^utC5h#>s!gIzb5W^)9`aF`8eKXQt)xOEkD_5P-S>7CM^6cSmyDbblY1>;1nh5wyTwWK<+?J)aOoTC) z$gjSA7Paab?38_6b!y@6FKwK=p?V=$pY`3Da1B*i2FQVS(Yj8jVk&nMiIa;ZcShSMKI{cPZL0K{Czz29V6B1tE_440P6 z!K;2@Q9wVGC(HK9cnD=PN%1WsZwsjHEBOOEl3j=D`~qabMChD3Yp>aGCiSB}MtrF;fpLZEr%pGmJt$K}Dtlf}J58gU+kU_{%`?8{KcpO>^R%z;A?8 zR-y2iU3)lW8>~tnqhgvCp70Yb^ZB#was&hqwaJF?X;T3JNQqZ7XidHNAVn>F)B6Jy zDz<_kZHYu!2@k2IMua{e+WAN$ny4I|bq=;Z>-gf$=$;!gm`(a7+J|zqm2)q-$k6od zlNG1=#h_n^aI*R2?T#u-%W`-h3%r1(rkW!ejKq0Wy*W zHR=AObPK5b%y)RNeL1g{#!K%E1zWtiW1?L1(!dw1W1`LL((1=frH@V6(GzuM%(6%u zFb*&6U*2ixxhW&a8#|!QC5zcK7OamiKI4C?UW?AoSBr?imu-F_BOI4&Py4Nt?=tBg znoKSHk;Bs4*jj$vH<$U`RG`v0++f(zj~And>?{w+D=vg6J9g_hsx5u-d7EnV!H%3W3h-2ri4W==Ed3PT6v!BLPWjcS`vpiv+ z?lbV~U3H~M4J)Gn%lq2+=;L(h<&ABx<;jLXygf`EG0piiK$)* zW=%RBwt5Jh2pl_k$7o<>H(m;(ucv;;3{2e?ENse$sp?Ysma77g?;-8B)&A(wLJZNUq1_s@N z31@y6Gzsn5v4bLP0JRlouuS|ww7bCtL%tLzPlR|XPV>qZ zM6u^py-RE*a<&|rL%RKgQA5u6Iz`_R@ACRSjM(I2=j}ffX)wBV4M-*b!50}VlBjGF z0ZgMqjVX0Ug+tK5)5`=6@XW~!FYwV-HbfF>K5XNRH3H_KXn~gz;oeaY#YD(vM5GI{ zdU<^p3Bg@~#X9!x!mD*!axI?MB<1txkvsq9qn|i5HnAqw$7Fn%g#mj;~d%KN(mN;SG553((o+a zFSjzDj^7xh_h+|7co*oX3bM<$rW?=J!AWVE7bXiFh&t}|u_FHttNokzM@X`{0zb~~ zw^TTcw!b zkEY=shR5FM^B>yq3J*0dyAZKcR96Z24AF7XNjM?b4fZvfN8aUT&RYj4#ju=#5}N)N zEM)?QElJZ@x+|LY0DPq9IfkIBTkk@jLq3K=r3R{60mgEPfm0E1AoE_*w`$%NbP+ip zZJZwT6gYDfJ(G2IarVbZ%H|pzy8e#~bkft$I%J6Q)a78U)HJ#Ld#OzwwvOGs1GLco zel$h-8^46_fcIo_mUEnR?Mr*~C!vaETE~0O3EM_HZY(Fh7f2eF`TnqnD+_1>mNdcq z_9jdBtCBDg%{__tRkK8KSI8pm6sG|Gl$2k7_IUxz$7!mHAJE4dB! z(0xDE5e1SNvo$DMJBuiDDT!=X9@f#Fz>73^kr}lA$2+N9EPPcT!51oBJmM7xg~YWpP{er z0u|8yFmI7cP`nYd6@|Vyv_A_lQHRPepR0&ct*Q6>+1H;i|LnjzJycXz);eVd3AIlY zk@2G59n2|EwYMZQTQR2|Ix@fm0jR~ywRhuVOQ`|ra=MgmUGlhu@y$YPtDYME5afG^_5D^Igt#5A8457B7?3kK4EM5^qO zPlqfPS$P2LZ3IyrP*HKYVRlzspOwenNA~(j(H=P+CRlpBq>j8g7}jnQNoi{20^EKZ zmsupR>IkJjJF_QWGrMvqy}-W>3_X3l@Ra1n^%J<2a-hDQq7^#he)L|=L=ZRTb4Qs7 zkxF@0|LCo{q1qT_Hu#hh2>AT&2Dzwj-u5VHm)H1iPC!qS+qhF|`6zXck7B5ib8w$t z+>L9^FC|CXExqsdYFnvgyJ~K@gJ3S;b~`s|;%b&pMWZQa3%V^HP9ka{@dfU5;Hx6D zkAUi_c$Z8Ie*ypa#L~EJn&Z^%@j8{ypnRb&B51oEk(B%CT6^skTyMcZR7YCZ$Bohh znbXv+Oss;VYrOWSRaSU6E%Eje6$nZhd)68hn6{_4C1W zR?#SYYj@23h@rVh-nVxFefp!ahFxeJ3_!Eaoetq`9s}ait<(5RE#oT$P!*1!1nYs1 zlTViizwcmfW>Q-}MoeR!aSEFg_9f8zK2}lTCeBXI%9;E6{~2pz0n%l3vZulKwVT%8 zB%YSv;r7aH8NJb5p#@1L4`@UxVPPba2;5O_emR-oV=QE_cm+@jOLmNtyNcY?|JDMM%N!e!DIL-s@e6JDy833xBWXQ)R(d-;VFD zS24xR>auxnDh%%y`$t}Khj`!hQcGNRxSTpWh`Rm=8f_I^r@8+YADawy`d#&!Od4*b zzJFqW@nFlPuD<&ay`g^XeYH+b2V7j;9Mcv_1UM9-D(Fx)`g^a=^z~8ny&?zSAk4p4 z+L3klIByl*q<8b&?^nduYY3qK@PV#26#(wu8wyLs1mlc*t`qSsi7E*_a)usT-HE8) zvPP!ge{s4es2n-$A}}Y$$Xs2y0s^-Yy99-~Pj$c=>yH+-5bmp$-^6{hK*}wZsIUEG zL?b2*5J%oCU1?59_3*wG*LK%QgZJ_>s4(Dx>6cYM-8{l-R)fxQxv|IC6kJKwH z_@OhMo-4hy&Eu3Q5P{Q@mD^dDT|e`?!?Co?g>iZFj?g1>H>!c7&~Bt~2lEFiEf!_+ zt$8RJINwSUeACH*a@@`U>}|Gq?&w@D7nFtCJ>4q5HAr5R7TXkAAn;@$H;8+yeAm{JpI-|12ceK0GdRFv! zN~U%r@N46_ltL~ce7oIrc2$f8Y0KS4-rJ1!J6S*hQuh2%0nm*ED3aL1Z4%Bn<0`Of zAeKXoW=AEWy@s2v9(_zK8jBFme)NQ4{?>aNODvK_p?VIUFc#cPx-bZz`wib_mreTa zt+&*vCJy8+8QSj_wVq83l_oEs`K} z1okjUe|ng(PRRlwdPPv&W$B|J_{^Lv;f;L_20v`$Qg1U9;$9g5+94EwS5GzU#JDbB zjHZavNC-#K4p^*|^9K!BeQwkDXGQt!JnZ^|6-{Y@ArpN4-H>BwO=W7!V1E^X5+;^N zdaQizb&3o>K57UF+b6Gpiws|$x)>%o|WY~ zemSKo8|CY&MIkn%jE*J~IaAzQP)zUe9j>7jN|C;f-Olv9oeAAaDZsr;rl1KZ%STcF zn594VP+1ME_&%3;zC)p~&wuPwqF-Bm-1O^AD`YSd8=TCt#DHXEc(RQspEC2A+n$t; z&sz^T5)zS?b9s9m_GNmJzSy26w3;t>5L33C(9FSa0-+$e*X+J14_X9Q_qVcrxBBW> zGfSxWi97lY?pihj8`E_5Aj442MXR!IZ`PF~wjw6L>AQHW(D%9v?Pc+or7kY<8cda4 z{_ugxD6_B4om_|l0q+9^ipFJmL*CI{+gQ@7h)Gq#@8olbCA2<-iAmN?Mr4ltaeiBo zWupm}pVYI4Hd>84KQrq*e^`)NnkCAlE}u(E`Gr3B`jv#xYX1vFBMQ4+8R2XfNtCY5 zXPB2bAT&*skX64xLOF3$Tg{=rwd@OR6Sr^k$!3OOw?nAOwCV^3vok&>1>61efe+CK zkz|vCq|Jh_cF&v+M!1_&zq%2JVO^*NC=Qo(CXHGNkfT0yissPr%sj>o$Ld@Q%WU&> z*=i_7Jq?>`&UCz`5W{Qy$~}{guQn#JWfjK+gmxU$c=4?}(%jr<4mq!R)!aXktQm}p z;Q(vFm&+kV>YY2%W03F%-<21am|lRC?^0~e;x-d49{>BpnqEXfq`&1Iw^@+)0@-e( z$@&$v#tNyqyN&}-+l|x|b|s48d)TYFN9qNs;;g3}7F8}331U_M%>O0mzhk@LU>gG{ z<_sGT#$kz*KBjk=Qg}vM9lS=3`fRtPSW?`Sr)Dh`C3l;Gu^dO^z#)Pw(5X{5n*Xys z|EH10aI69GB+{s}B>PBx?<*MlUPE?EgTI9hiKVBkdCiee@PjVwr<3Q_^g*ZA7q(sm zZ45J>>H@j~w&pAQ>0#NFKv45qt)c{wFoj^Bou$l|;_<%owOjNfny%p{D*R@)LTyoR z3*qw8Woq;KxsPVgc`^;UbmZ?r(bY|VlY!R&cGB<>**MD9dKeHD%05lxFDX`qLR*`?al)?m>*|PLY?3Ah zDPa5KrBdvmMgeQ&XvC*%dHg&e5#lTa>9z2x2Jy1HC*#u&5H?}!TUBdyUP8Ev^}}MB z2PNj*rN#8Mp?J0XF+iBXR|8ixF~{$*#r1hxd*29?Lb>{WZ%+9Nh@_cd4sCzJLp_m`43b>1|YAzv==IU)rJg?4B@oeKy;~K_r z#*P)uwF`!hjY5DeSZ;!Ekg*|}-r-)XhQ4}?OQJnUvqTD#{l<`gPI8#KQ8lqw)TMUY z1#xw^lKU`&8okT6RtF#x{8>S_w2C1TOs}TM&zGXD=uBD0!}uc9aIZ-eopRrZtaI+W zSi+_?IdhEpY8uq57LM#{!tcpxIvD4u9Y5ky3Ucz5H8)pORFvC#sv5em?o?_PnTZYS zIr5RBDtR=+ruzLGiKvU1QY)>qN<#%9QxTSm$=(C)?&^7O3<1Mjs#*a-aKoGBVPO{# zXisRftnkv?10D)JA2?_idGnYX9!hVX{vm#5uAY3K>_YKC%xR`#)L$wUbfSTa6x8&W z#K&z{RdfZi1Kkzpy=7I%YGdl-s_9?E z{ulr2VquZ67@GWtgZ^zTFLxiDe>l6cql+K*z9eZn)l|b9yHH;oBRn1}l1QZ_n*~#@ zoBA79t^pUQ@EmDI4N0k~ee&GGw*d}yJHYY{vy##!H~D3)6)#V0h^$bp6<};r7rWNjCRngMG3KiP5U%k1KhMHTh|{Y&VPH4WjCMy7P3E(PRnkU zw^3D3m42|&dKdkjBE0;e{uG_3r6HMO`EnP{JjYa2c!l&|j&~8lbWVwg)r0}i#|JLx~eSupilag{VxUM?ihj70Bw8^^vt^SBen=}ITgOI(9=|Xk_pXvYwbCQxLvvZ8iJ01wn&R=>SgBLv#Gr>)1C(* zvO3hRl(o2Fh%iM4xV5i1R3$q(?-TGSIw&=|lbYMZazLEdVxsZ(YHm-)bTA>K35`LztnpCV?WOBdy!Fg!oBz*a%LdB>cbYGBDI0;#zT}*vnMw2_wVy_7Ji*SCq5fS@rnUn%pz$k3m zjY+MZ-iKDH=-neIHE%fE9g2L>U9?F@HH??i30YIalfTWP;dvoUzQmRTXrMHMJ1t1ZnEzquH41 z1L7X1K;6t?#)z-LTK9xk-t^O%sxYp|#6k&}r7jC1F10wC&7bZ{mwvK zA4r_1HA3u~MoY8QMe&M*hTaBy{xiWJvV&aLH;Cb;mZ925wgelc3q>llyeBS|6>ydb zM^^1I_g3bW1&v>sXLi>|B@E0(gQlEXaZq?V7{NWQCaw$2eQ;1LC0*meNKd+OkHIUWe?B-ay8D3S>?(xHnOK3uc2`|L^sVHI=w~Dj zfxKa&mCg@F8~B?+JDmbAGU{JxU{lqXD)fEBD1TB?iYkuIS-J;?6e3QRfz)gfkBRYn zkc0B&kC%S~d_EcmR&7oO&k&5C^j=i<8g`ezy)5piLg=30+=WpSFwZ&u9{g3F@Jm1V zE7{Q1FMXGdcp7+7)A&?jqVZzwQMh;Bta~1!5}waUnGhvsR%TKoDkjRAh$7SkBKU@r8;z$VDa) zVzy)kSO_fPw_WRM2@#)lBDn;jAI1ATu=*IP)^`JB$SRu+x|`O0DbMo~0#@OD%lpPN z$72SVeEz{cUW&B6lfzy^=i-+NpkJ=58MJoFHcw=YH0yJc`QyQDD!ui zcrggeErVY&0rU0ZS|v08%F649KNr_K*^e6zM~&t?T4TU^!zWyvh(8#vZP;ty^kIPh zfH7wuj=*qXS~xb?stPJmsA!$SEfnElDFrgpsOYv2W~gYQr#v%CNK88_ zy6@BkSJrk>Xv2Be?_osZYw#K1c2a%6GkV&kPbsadfX|fb!w;w@q2gVUdbcQ6 zf?y#B(<3RrTcM+onf=K~IRU^a%=*yZ=^oWrHUd5Uvtf@vIIv-ljR#1Vi1I7h!5?g&J)?Y+HF;Ce%;PX=5Oo63)M)=5nJHR13Wih<;X4+%$RsiwpON1 zFrLSq6idF8o!D8F`E#U8eMbI7>mf7%b#EQ!HmO2?xN&`d!nTIh&=QzD1`RmKVwlMZ z3`)M=CDAzF)Al$lk@Ud9RD3QLxZ%17gcmaCE%e%pyDvmY`#o~YE27#1R{koiQIcGA ziA2HPr@)5yVA%V*KAEoTUT@oAtH`&VM9!qmtR1) zr;O>4e1SYGPCbN0!K=cqC$x5~?PvvbC$jaG+3dJ>5LohFxGT6mFdI?nmkiA#v2I%` zH-y-Y9oj9;9JgA}Z_^s_s65f<_g@K9BR^Ok$Zc$EupRe2UCe5%y!^X7lOr4C^71;N z!XyY7gydBe4BXO^`>5O9M`XmK-5T#CsT6a5x8HUrKM^?aeW&WgN~3~o38hZ@fO9=X z?r=JB3M`p69t#8x=F!NNN1kaxwYGp z+z;FbH`-{fku5GO6nkPz=;e-Onr}+oCH4=iJV#sHCdKJD=|2}^gB|M`tgcst@qXHC zEpujU`;b_b3u$H$r-!T?})x0^Ubj3$F@B?pHFYcL_CKLYan_Y<{5=qp-~T zg`()JJVLiMJR9{X6|nH$|F~HkCAe_}v$n9b2r%J1)5h+an^flYrn1PmeQ)#n$);9u zFeBufY!z=O^~ZF&Z~o$umDSTyFkBKxL0sp=^mQIACruMZ$Vk{?9AS=<$PW=E8Ngkn zN;lLlQ%s9!Aphb+k`leQ5T&hLXb%)0w5|&Zy=3~BCpVCdFA>Y<^q6+fSVyxIdZD&K z;nqm~&tLgv(pYE#;quRTk%C0G3QLnzxuxu#@%n`8vB+f179?^hI zZN}s6o)%RfwP&S&*EBL3NcCo|98PqBqxScVfcDHkFv z2!A{adz`{Wy30Xb42$?&827UOUxA3f)9%fM`D1iK>R!?wd|q*LojPZe5h*)@LpStkED0dM-!vJ!tSe zS+p%~Lck+?VXZp--yUXrmwNh-wVOCx^#2 zL?%0!4}Ch9=@<`u+o)?`4lgvR_6G|NKBc(450N^K)^l?*zF-MVdNU{L3uq#JBHFRb zswtNqZ!V*^EXaq8@Q-3cK-o+LuJ28`Mcs!&Rc+6;TXx>};5X4^Nu90@(c)ZQ&u(Tw zZblD@<}yUFlYhlIrHJ~uh!3$%v4C_(mAS>wbslOEbvCCM*M77DJUXCl#j`a{256`b zDH)7^-92F=EiX%tbDQl2KN7+0!pdvUa5+k8`bs~Pu8mtd8swjhBb*A*{1mlWN z8Fr;(>Y4U__XU^QxQ)?K-RAmPNCv;Qx>O2J2R>KSQKUo$#}%zNPDIm)a2DOq81ur! zQN3ZQH7iyA;JRCY-}?kLCAB{FY&-*Hud{K>-7Jvhn{rov+1qd}^B9-_(jnZJBY)X9 z&+YELFkjDwb_QY6gc&-wJ>REBv0LpgtIhoVX6!e|jm^*DbS)%^< zRCg14{x0S1d}w#g2F;L!)j%q? z^6$3&d~K~?BWV!YuLXhJIByl3o^QS-dal^aS)%D_qH)YuGN~|GC=^Ts`MvsL;;u$1 ztX;SN+k*OAVj0lqf|3B5Q37`gi(`=%&ut$zmRU}}$2~vZ8}6b)IM$T$@JydR9ajyR zHf|_Qqf>eXU?IB9Xvz2vnm^0c3LY)PD@p5CWKU+_f~ZC-AH@d9 z$E#SR{C*sC46YhorAQ~7#sr_@OvyTqa-`=Wg`(=KUz!V1j@doNu><8YMwg3tgr^<}(#<}V!XeAu8j`8}IKT8Vd%!UjuaqGN(BC;IBv#Wb7vwov_DnfleoAcb zIU8h?I^x`SD#k5tXZC8ZpKEx{F-L4?>ay#@q$3on@B;cmFClg)LJ?6qzpSq5TV!f+ zoSd%fBH~SV1%W}L1AH{dg9h0+^q7c2w=%frby0#c@C@&y@dIiH3R@4uBLk)Z z7U=uO-1Ko%?T$`pX)1UtmpO4LA}W~N7&HWW=Lp23*#lK9qP zS+N1FybV#pgm@Y|iS!`RHUf}g%3IJkqb9foO`h_*Uz*(~l$~oifsi2Rs9=*aCs6lg zXK$ky`MmC8`#>6fWnp3Gxo>lh`(~0$49~uy;_Iy0#%Yt-bqRj#2sP?a<%ChUHXu&P zy<^ z>AP_fEXQMyW!iXomPF%f=jrKxq7Q781%}RVd=8l4q40Q(&3yKyJ0c5H7AN2{6LGMs zOOA=TeCKPrT_tlSmzyqtm`R&9MX9f|^HLn}z5BKa8Fy~7NY`8atKxubU( g|2v7r`(ONDsrs*Hukx??Z%VZSmHJr+p>hWP2b#$-p#T5? diff --git a/figures/subsampling.svg b/figures/subsampling.svg index aaebcec..c6073fb 100644 --- a/figures/subsampling.svg +++ b/figures/subsampling.svg @@ -1,7 +1,7 @@ - - + + @@ -19,30 +19,30 @@ - + - + - + - Subsampling + Subsampling - Subsampling + Subsampling - Subsampling + Subsampling - - - + + + diff --git a/index.Rmd b/index.Rmd index cf2940b..9f911cd 100644 --- a/index.Rmd +++ b/index.Rmd @@ -28,6 +28,7 @@ opts_chunk$set(echo = FALSE, # Summary +- The MetabarSchool Package - What do the reading numbers per PCR mean? - Rarefaction vs. relative frequencies - alpha diversity metrics @@ -35,6 +36,28 @@ opts_chunk$set(echo = FALSE, - multidimentionnal analysis - comparison between datasets +# The MetabarSchool Package + +## Instaling the package + +You need the *devtools* package + +```{r eval=FALSE, echo=TRUE} +install.packages("devtools",dependencies = TRUE) +``` + +Then you can install *MetabarSchool* + +```{r eval=FALSE, echo=TRUE} +devtools::install_git("https://git.metabarcoding.org/MetabarcodingSchool/biodiversity-metrics.git") +``` + +You will also need the *vegan* package + +```{r eval=FALSE, echo=TRUE} +install.packages("vegan",dependencies = TRUE) +``` + # The dataset ## The mock community {.flexbox .vcenter .smaller} @@ -68,6 +91,8 @@ data("positive.samples") ## Loading data ```{r echo=TRUE} +library(MetabarSchool) + data("positive.count") data("positive.samples") data("positive.motus") @@ -94,6 +119,8 @@ positive.count[1:5,1:5] ## Loading data ```{r echo=TRUE} +library(MetabarSchool) + data("positive.count") data("positive.samples") data("positive.motus") @@ -119,6 +146,8 @@ head(positive.samples,n=3) ## Loading data ```{r echo=TRUE} +library(MetabarSchool) + data("positive.count") data("positive.samples") data("positive.motus") @@ -169,7 +198,7 @@ positive.motus = positive.motus[are.not.singleton,] $`r nrow(positive.count)` \; PCRs \; \times \; `r ncol(positive.count)` \; MOTUs$ matrix -## Not all the PCR have the number of reads {.flexbox .vcenter} +## Not all the PCR have the same number of reads {.flexbox .vcenter} Despite all standardization efforts @@ -240,13 +269,19 @@ positive.count.rarefied = rrarefy(positive.count,2000) ## Rarefying read count (2) {.flexbox .vcenter} -```{r fig.height=3} +```{r fig.height=4} par(mfrow=c(1,2),bg=NA) hist(log10(colSums(positive.count)+1), main = "Not rarefied", + xlim = c(0,6), + ylim = c(0,2300), + breaks = 30, xlab = TeX("$\\log_{10}(reads per MOTUs)$")) hist(log10(colSums(positive.count.rarefied)+1), main = "Rarefied data", + xlim = c(0,6), + ylim = c(0,2300), + breaks = 30, xlab = TeX("$\\log_{10}(reads per MOTUs)$")) ``` @@ -325,11 +360,11 @@ knitr::include_graphics("figures/diversity.svg") @Whittaker:10:00



-- $\alpha-diversity$ : Mean diversity per site ($species/site$) +- $\alpha\text{-diversity}$ : Mean diversity per site ($species/site$) -- $\gamma-diversity$ : Regional biodiversity ($species/region$) +- $\gamma\text{-diversity}$ : Regional biodiversity ($species/region$) -- $\beta-diversity$ : $\beta = \frac{\gamma}{\alpha}$ ($site$) +- $\beta\text{-diversity}$ : $\beta = \frac{\gamma}{\alpha}$ ($sites/region$) @@ -410,25 +445,19 @@ kable(data.frame(`Gini-Simpson`=GS), kable_styling(position = "center") ``` -## Shanon entropy {.smaller} +## Shannon entropy {.smaller} -
-Shanon entropy is based on information theory. +Shannon entropy is based on information theory: -Let $X$ be a uniformly distributed random variable with values in $A$ +
+$H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}$ +
-$$ -H(X) = \log|A| -$$ -
-
-
+if $A$ is a community where every species are equally represented then $$ -H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i} +H(A) = \log|A| $$ -
-
```{r out.width = "400px"} @@ -441,7 +470,7 @@ H = - rowSums(environments * log(environments),na.rm = TRUE) ``` ```{r} -kable(data.frame(`Shanon index`=H), +kable(data.frame(`Shannon index`=H), format="html", align = 'rr') %>% kable_styling(position = "center") @@ -452,12 +481,12 @@ kable(data.frame(`Shanon index`=H),
As : $$ -H(X) = \log|A| \;\Rightarrow\; ^1D = e^{H(X)} +H(A) = \log|A| \;\Rightarrow\; ^1D = e^{H(A)} $$
-where $^1D$ is the theoretical number of species in a evenly distributed community that would have the same Shanon's entropy than ours. +where $^1D$ is the theoretical number of species in a evenly distributed community that would have the same Shannon's entropy than ours.
@@ -548,10 +577,10 @@ exp_q = function(x,q=1) { } ``` -## Generalised Shanon entropy +## Generalised Shannon entropy $$ -^qH = - \sum_{i=1}^S pi \times ^q\log pi +^qH = - \sum_{i=1}^S p_i \; ^q\log p_i $$ ```{r echo=TRUE, eval=FALSE} @@ -616,7 +645,7 @@ abline(v=c(0,1,2),lty=2,col=4:6) - $^0H(X) = S - 1$ : the richness minus one. -- $^1H(X) = H^{\prime}$ : the Shanon's entropy. +- $^1H(X) = H^{\prime}$ : the Shannon's entropy. - $^2H(X) = 1 - \lambda$ : Gini-Simpson's index. @@ -1074,15 +1103,15 @@ BC_{jk}=\frac{\sum _{i=1}^{p}(N_{ij} - min(N_{ij},N_{ik}) + (N_{ik} - min(N_{ij} $$ $$ -BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}} +BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}} $$ $$ -BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{1+1} +BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{1+1} $$ $$ -BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}]N_{ij} - N_{ik}| +BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}|N_{ij} - N_{ik}| $$ ## Principale coordinate analysis (1) {.flexbox .vcenter} @@ -1109,7 +1138,7 @@ plot(guiana.bc.pcoa$points[,1:2], xlab="Axis 1", ylab="Axis 2", main = "Bray Curtis on Rel. Freqs") -plot(guiana.euc.pcoa$points[,1:2], +plot(guiana.euc.pcoa$points[,1],-guiana.euc.pcoa$points[,2], col = samples.type, asp = 1, xlab="Axis 1", @@ -1123,7 +1152,7 @@ plot(guiana.jac.1.pcoa$points[,1:2], xlab="Axis 1", ylab="Axis 2", main = "Jaccard on presence (0.1%)") -plot(guiana.jac.10.pcoa$points[,1:2], +plot(-guiana.jac.10.pcoa$points[,1],guiana.jac.10.pcoa$points[,2], col = samples.type, asp = 1, xlab="Axis 1", @@ -1162,6 +1191,58 @@ plot(0,type='n',axes=FALSE,ann=FALSE) legend("topleft",legend = levels(samples.type),fill = 1:4,cex=1.2) ``` + + ## Comparing diversity of the environments ```{r} diff --git a/index.html b/index.html index eeb275f..078b191 100644 --- a/index.html +++ b/index.html @@ -4,7 +4,6 @@ Biodiversity metrics and metabarcoding - @@ -82,6 +81,10 @@ transition: opacity 0.6s ease-in 0.4s; opacity: 0; } +/* https://github.com/ropensci/plotly/pull/524#issuecomment-468142578 */ +slide:not(.current) .plotly.html-widget{ + display: block; +} @@ -106,14 +109,31 @@

Summary

    +
  • The MetabarSchool Package
  • What do the reading numbers per PCR mean?
  • -
  • Rarefaction vs. relative frequencies
  • +
  • Rarefaction vs. relative frequencies
  • alpha diversity metrics
  • beta diversity metrics
  • multidimentionnal analysis
  • comparison between datasets
+

The MetabarSchool Package

+ +

Instaling the package

+ +

You need the devtools package

+ +
install.packages("devtools",dependencies = TRUE)
+ +

Then you can install MetabarSchool

+ +
devtools::install_git("https://git.metabarcoding.org/MetabarcodingSchool/biodiversity-metrics.git")
+ +

You will also need the vegan package

+ +
install.packages("vegan",dependencies = TRUE)
+

The dataset

The mock community

@@ -609,14 +629,19 @@ Lotus corniculatus

The experiment

    -
  • 192 PCR of the mock community using SPER02 trnL-P6-Loop primers

  • +
  • 192 PCR of the mock community using SPER02 trnL-P6-Loop primers

    + +
    • 6 dilutions of the mock community: 1/1, 1/2, 1/4, 1/8, 1/16, 1/32

    • 32 repeats per dilution

    • +

Loading data

-
data("positive.count")
+
library(MetabarSchool)
+
+data("positive.count")
 data("positive.samples")
 data("positive.motus")
@@ -880,7 +905,9 @@ sample.TM_POS_d16_2_a_A1

Loading data

-
data("positive.count")
+
library(MetabarSchool)
+
+data("positive.count")
 data("positive.samples")
 data("positive.motus")
@@ -992,7 +1019,9 @@ sample.TM_POS_d16_1_b_A2

Loading data

-
data("positive.count")
+
library(MetabarSchool)
+
+data("positive.count")
 data("positive.samples")
 data("positive.motus")
@@ -1212,11 +1241,11 @@ positive.motus = positive.motus[are.not.singleton,]
  • positive.count is now a \(192 \; PCRs \; \times \; 5579 \; MOTUs\) matrix
  • -

    Not all the PCR have the number of reads

    +

    Not all the PCR have the same number of reads

    Despite all standardization efforts

    -

    +

    Is it related to the amount of DNA in the extract ?

    @@ -1227,7 +1256,7 @@ positive.motus = positive.motus[are.not.singleton,] boxplot(rowSums(positive.count) ~ positive.samples$dilution,log="y") abline(h = median(rowSums(positive.count)),lw=2,col="red",lty=2) -

    +

    @@ -1268,7 +1297,7 @@ Only 7.4% of the PCR read count variation is explain by dilution

    Rarefying read count (2)

    -

    +

    Rarefying read count (3)

    @@ -1284,16 +1313,16 @@ are.still.present[1:5]
    ## are.still.present
     ## FALSE  TRUE 
    -##  1942  3637
    +## 1886 3693

    Rarefying read count (4)

    par(bg=NA) 
     boxplot(colSums(positive.count) ~ are.still.present, log="y")
    -

    +

    -

    The MOTUs removed by rarefaction were at most occurring 21 times

    +

    The MOTUs removed by rarefaction were at most occurring 13 times

    The MOTUs kept by rarefaction were at least occurring 2 times

    @@ -1306,7 +1335,7 @@ positive.motus.rare = positive.motus[are.still.present,]
    -positive.motus.rare is now a \(192 \; PCRs \; \times \; 3637 \; MOTUs\) +positive.motus.rare is now a \(192 \; PCRs \; \times \; 3693 \; MOTUs\)
    @@ -1340,9 +1369,9 @@ positive.motus.rare is now a \(192 \; PCRs \; \times \; 3637 \; MOTUs\)



    Whittaker (2010)



      -
    • \(\alpha-diversity\) : Mean diversity per site (\(species/site\))

    • -
    • \(\gamma-diversity\) : Regional biodiversity (\(species/region\))

    • -
    • \(\beta-diversity\) : \(\beta = \frac{\gamma}{\alpha}\) (\(site\))

    • +
    • \(\alpha\text{-diversity}\) : Mean diversity per site (\(species/site\))

    • +
    • \(\gamma\text{-diversity}\) : Regional biodiversity (\(species/region\))

    • +
    • \(\beta\text{-diversity}\) : \(\beta = \frac{\gamma}{\alpha}\) (\(sites/region\))

    \(\alpha\)-diversity

    @@ -1583,10 +1612,10 @@ Environment.2 -

    Gini-Simpson's index

    +

    Gini-Simpson’s index

    -

    The Simpson's index is the probability of having the same species twice when you randomly select two specimens.

    +

    The Simpson’s index is the probability of having the same species twice when you randomly select two specimens.

    \[ @@ -1597,7 +1626,7 @@ Environment.2

    \(\lambda\) decrease when complexity of your ecosystem increase.

    -

    Gini-Simpson's index defined as \(1-\lambda\) increase with diversity

    +

    Gini-Simpson’s index defined as \(1-\lambda\) increase with diversity

    @@ -1663,24 +1692,20 @@ Environment.2 -

    Shanon entropy

    +

    Shannon entropy

    -
    -

    Shanon entropy is based on information theory.

    +

    Shannon entropy is based on information theory:

    -

    Let \(X\) be a uniformly distributed random variable with values in \(A\)

    +
    -

    \[ -H(X) = \log|A| +\(H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i}\) + +

    + +

    if \(A\) is a community where every species are equally represented then \[ +H(A) = \log|A| \]

    -


    - -
    -

    \[ -H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i} -\]

    -
    @@ -1701,7 +1726,7 @@ H^{\prime }=-\sum _{i=1}^{S}p_{i}\log p_{i} -Shanon.index +Shannon.index @@ -1747,15 +1772,15 @@ Environment.2 -

    Hill's number

    +

    Hill’s number

    As : \[ -H(X) = \log|A| \;\Rightarrow\; ^1D = e^{H(X)} +H(A) = \log|A| \;\Rightarrow\; ^1D = e^{H(A)} \]

    -

    where \(^1D\) is the theoretical number of species in a evenly distributed community that would have the same Shanon's entropy than ours.

    +

    where \(^1D\) is the theoretical number of species in a evenly distributed community that would have the same Shannon’s entropy than ours.

    @@ -1851,7 +1876,7 @@ Environment.2

    Impact of \(q\) on the log_q function

    -

    +

    And its inverse function

    @@ -1871,17 +1896,17 @@ Environment.2 (1 + (1-q)*x)^(1/(1-q)) } -

    Generalised Shanon entropy

    +

    Generalised Shannon entropy

    \[ -^qH = - \sum_{i=1}^S pi \times ^q\log pi +^qH = - \sum_{i=1}^S p_i \; ^q\log p_i \]

    H_q = function(x,q=1) {
       sum(x * log_q(1/x,q),na.rm = TRUE)
     }
    -

    and generalized the previously presented Hill's number

    +

    and generalized the previously presented Hill’s number

    \[ ^qD=^qe^{^qH} @@ -1908,22 +1933,22 @@ qs = seq(from=0,to=3,by=0.1) environments.hq = apply(environments,MARGIN = 1,H_spectrum,q=qs) environments.dq = apply(environments,MARGIN = 1,D_spectrum,q=qs) -

    +

    Generalized entropy \(vs\) \(\alpha\)-diversity indices

    • \(^0H(X) = S - 1\) : the richness minus one.

    • -
    • \(^1H(X) = H^{\prime}\) : the Shanon's entropy.

    • -
    • \(^2H(X) = 1 - \lambda\) : Gini-Simpson's index.

    • +
    • \(^1H(X) = H^{\prime}\) : the Shannon’s entropy.

    • +
    • \(^2H(X) = 1 - \lambda\) : Gini-Simpson’s index.

    -

    When computing the exponential of entropy : Hill's number

    +

    When computing the exponential of entropy : Hill’s number

    • \(^0D(X) = S\) : The richness.

    • \(^1D(X) = e^{H^{\prime}}\) : The number of species in an even community having the same \(H^{\prime}\).

    • -
    • \(^2D(X) = 1 / \lambda\) : The number of species in an even community having the same Gini-Simpson's index.

    • +
    • \(^2D(X) = 1 / \lambda\) : The number of species in an even community having the same Gini-Simpson’s index.


    @@ -1941,7 +1966,7 @@ environments.dq = apply(environments,MARGIN = 1,D_spectrum,q=qs)
    H.mock = H_spectrum(plants.16$dilution,qs)
     D.mock = D_spectrum(plants.16$dilution,qs)
    -

    +

    Biodiversity spectrum and metabarcoding (1)

    @@ -1950,11 +1975,11 @@ D.mock = D_spectrum(plants.16$dilution,qs) FUN = H_spectrum, q=qs) -

    +

    Biodiversity spectrum and metabarcoding (2)

    -

    +

    Biodiversity spectrum and metabarcoding (3)

    @@ -1963,7 +1988,7 @@ D.mock = D_spectrum(plants.16$dilution,qs) FUN = D_spectrum, q=qs) -

    +

    Impact of data cleaning on \(\alpha\)-diversity (1)

    @@ -1999,7 +2024,7 @@ positive.clean.H = apply(positive.clean.count.relfreq, FUN = H_spectrum, q=qs) -

    +

    Impact of data cleaning on \(\alpha\)-diversity (3)

    @@ -2008,7 +2033,7 @@ positive.clean.H = apply(positive.clean.count.relfreq, FUN = D_spectrum, q=qs) -

    +

    \(\beta\)-diversity

    @@ -2102,7 +2127,7 @@ d_c =& \max\limits_{1\leqslant i \leqslant n}\left|a_i - b_i\right| \\ \end{align} \]

    -

    For the fun… ;-)

    +

    For the fun… ;-)

    You can generalize those distances as a norm of order \(k\)

    @@ -2140,7 +2165,7 @@ d(x,z)\leq \max(d(x,y),d(y,z))
  • A metric induce a metric space
  • In a metric space rotations are isometries
  • This means that rotations are not changing distances between objects
  • -
  • Multidimensional scaling (PCA, PCoA, CoA…) are rotations
  • +
  • Multidimensional scaling (PCA, PCoA, CoA…) are rotations
  • The data set

    @@ -2167,7 +2192,7 @@ data("guiana.samples")
    s = tag_bad_pcr(guiana.samples$sample,guiana.count)
    -

    +

    guiana.count.clean = guiana.count[s$keep,]
     guiana.samples.clean = guiana.samples[s$keep,]
    @@ -2182,7 +2207,7 @@ guiana.samples.clean = guiana.samples[s$keep,]
    s = tag_bad_pcr(guiana.samples.clean$sample,guiana.count.clean)
    -

    +

    guiana.count.clean = guiana.count.clean[s$keep,]
     guiana.samples.clean = guiana.samples.clean[s$keep,]
    @@ -2197,7 +2222,7 @@ guiana.samples.clean = guiana.samples.clean[s$keep,]
    s = tag_bad_pcr(guiana.samples.clean$sample,guiana.count.clean)
    -

    +

    guiana.count.clean = guiana.count.clean[s$keep,]
     guiana.samples.clean = guiana.samples.clean[s$keep,]
    @@ -2264,7 +2289,7 @@ xy = xy[,1:2] xy.hellinger = decostand(xy,method = "hellinger")
    -

    +

    @@ -2284,15 +2309,15 @@ BC_{jk}=\frac{\sum _{i=1}^{p}(N_{ij} - min(N_{ij},N_{ik}) + (N_{ik} - min(N_{ij} \]

    \[ -BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}} +BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{\sum _{i=1}^{p}N_{ij}+\sum _{i=1}^{p}N_{ik}} \]

    \[ -BC_{jk}=\frac{\sum _{i=1}^{p}]N_{ij} - N_{ik}|}{1+1} +BC_{jk}=\frac{\sum _{i=1}^{p}|N_{ij} - N_{ik}|}{1+1} \]

    \[ -BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}]N_{ij} - N_{ik}| +BC_{jk}=\frac{1}{2}\sum _{i=1}^{p}|N_{ij} - N_{ik}| \]

    Principale coordinate analysis (1)

    @@ -2305,17 +2330,36 @@ guiana.jac.50.pcoa = cmdscale(guiana.jac.50.dist,k=3,eig = TRUE)

    Principale coordinate analysis (2)

    -

    +

    Principale composante analysis

    guiana.hellinger.pca = prcomp(guiana.hellinger.final,center = TRUE, scale. = FALSE)
    -

    +

    + +

    Comparing diversity of the environments

    -

    +

    Bibliography