From e3d922e1035361b2b26423ab5acab89ea018a6d6 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Mon, 20 Apr 2009 08:38:41 +0000 Subject: [PATCH] Merge of eric-test branche to the trunk git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@200 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- src/ecoPrimer | Bin 0 -> 67832 bytes src/ecoprimer.c | 293 ++++++++++---- src/libecoPCR/ecoError.P | 15 + src/libecoPCR/ecoIOUtils.P | 15 + src/libecoPCR/ecoMalloc.P | 15 + src/libecoPCR/ecodna.P | 5 + src/libecoPCR/ecofilter.P | 5 + src/libecoPCR/econame.P | 15 + src/libecoPCR/ecorank.P | 15 + src/libecoPCR/ecoseq.P | 19 + src/libecoPCR/ecotax.P | 15 + src/libecoprimer/Makefile | 2 + src/libecoprimer/aproxpattern.c | 4 +- src/libecoprimer/ecoprimer.h | 148 ++++++-- src/libecoprimer/hashsequence.c | 5 + src/libecoprimer/pairs.c | 350 +++++++++-------- tools/ecoPCRFormat.py | 651 ++++++++++++++++++++++++++++++++ 17 files changed, 1308 insertions(+), 264 deletions(-) create mode 100755 src/ecoPrimer create mode 100644 src/libecoPCR/ecoError.P create mode 100644 src/libecoPCR/ecoIOUtils.P create mode 100644 src/libecoPCR/ecoMalloc.P create mode 100644 src/libecoPCR/ecodna.P create mode 100644 src/libecoPCR/ecofilter.P create mode 100644 src/libecoPCR/econame.P create mode 100644 src/libecoPCR/ecorank.P create mode 100644 src/libecoPCR/ecoseq.P create mode 100644 src/libecoPCR/ecotax.P create mode 100755 tools/ecoPCRFormat.py diff --git a/src/ecoPrimer b/src/ecoPrimer new file mode 100755 index 0000000000000000000000000000000000000000..1ccf5663a0f12775dfd89a09b147b62497ab0902 GIT binary patch literal 67832 zcmeFaeSB2awKqOVCNN-Nq9!%1pktkAkhesL5)x{L%#bs7kSIY=KnQsusUeBU323Aa-r~L160ennw}ABpvGonB)cSJ9h|sElR-Nbj-Fu%ilK|?y z&+qg3{qcA{O!hf@?X~vWYp=cb+H3D~c;@57pAB_5Qqmlbb*T=ABLlzNhd3NvNO^r7 zKMlV{iz(mUfOS%%NfL~%Z>OMYCmsBPu4CC!bC+QN|~1t-({^(~U#9)-l` z5F=%o{%neh1a4*HC5p%c`u(& zP48x_2UG#AMN49NL%Z~#)c&mlT_ebM!<-_$zZII0=9xU^^_Q~$VQM@sMOX3w7rd7ak_0h|4^py)=MlYYNmzQYU z+FEL>8*1B-?V=;sSDtYe60NU1lZBCT>SO9*+E<<@lX09k`^xhtO`3E?U-^`}x;iAK z66TkQaBcd^LwwTo@`|1A^8u+7e`goOwT^cen}0w9wCHN@JWcm|=b4MeWA;^8QfT_y zn%?_(1(40P%r?@G-0N@{l_(o6^otUI)r=_xJ*I=;j$MoA?*#EqR_B-v>iJr9fj3i& zhP@t5f6e?1GdI&ukot{~H^*;OxPp3*p*bIaZsw9qzrOeQd#FW6Eq?v!-p|h0U-0V( zdgDXJ97m0>V5Oy(ctwo?!(kdE*Rd;qKAKhP@KqM{1oc17(ew}X&$Q^YT_`=p(u{z^XN0{OAYy)eIWnTt?nXgP--A{xXW=X}yYN%8@Cv(d zkhL2Dv~I*2W=5yoz@)k~&;58-3M-QLlT$!>6e&j_j%in*5HijX8IzGQL1uj1C(iU9muPh5oAf)Me=0S2evaR`Xdrj`2g; z`V1TX8ykL;g!|?O^|iISC^+0-vngnl zc;9ztX79a?o!sJ{yRYj&IMXPfjAri;%tBUs-FZ9Scb_@85`SjRgJd$^cQ5D-Mo0A_ zqr5`Xi`^6EOj$l0gaRe*3B4j` zENBkeJ6qHLiT1iMT(18)S~NFDj>P4SRKl_xEt*Y783;p=bRT97y@%3ZvF^(fl>`d* z1@&E8$J$ItYCph=534?{;|fQ3^0%XtpA1FIa|-qq9Iy+lLup4a>de&Qy;{eP#hN}u z>)7MciruZTV9GwtSeq&6M*dq68Ak$|amBZzSx*)mE7&KS4Sbgt-ve6;{8W%}X+@v4 z#*DU{kp50cf7?74{oK`~RhyABAUBtk+s=Z6ijAN#`4#;&l-1%p&G|$O6%ASwL0SNycvzqjD?Pn z{tr+wCYkFmW)IGB=w&XW)zx(@@*~Y~dgG4%A12v%(IC-IuqB!*+8NZ3CnSOPPSK2s zexueA=z3%AIDgSHM|d7v483Xu(`a!HTa31c+4j}me6)B9TI>i#mt#Eiv(1^p{*vRJ z;P}n(HqBTC=^ZnteFyQqp2!;ky}73UUa0DC@1o+&g^??aQc=a^v9ke|EN=cQk|h3p zbH$7q{BLV|#8}5b_vxR|#Lw2&zq=P&7{biu>Fe}9MwP^c!S7M2SEYGkQuTYs$ywF! z%_l%ZNiHkJO(}1XA?HiKHzcN3zqf*OD^#Hpuz8$dalZifi&gm=mDZ~iO#(|$tCqS= z7=Vu;e28J~hcbVPA{{5cf(|8j-m0bSj~{|uU&O+GFM9m{ z=9iA3&FA5t^*Qhy1& z4v%Ox?Z-!c{Xy^TC>x9-L@pdAeY1BHGBo`iz0BuW#C5d^4<-c zQ1lnxtw_yuQEnzX<}k`7I>t-`B&5GZkI%_MNK=veQS1G{OgMk=`~3PdzT@xp#?LS4 z(VwT!_t}s!pXrmm@bkdH>q7EidJ85-w#1Mf0HFxsnA#(WgeDPhzC^#o@7mbqrX5Dy}6H>nrPD0<3)eaEwp4Kkl{PV{S$9BqHGxE*REGCuD z8kQJ+=8+H^{2Ax+%43+V*YDa3go2(VXAjd}XFnRnEH*oKpM*Ek_{Hv!51M$;@c?mOL$VO@v-BpT*&jm+vs560Meq;l_Mq-r~G4qfOm_vYVZp}U$aMwUm|6v2B69XEphTD&A zz*GWCZNO?9&_F=Y22|UCy9v1123%@RSW$LcldP;9(nZ4*}&i;Ab{qKLK7FaH|b?jes#Wpuq+lBfxJ1 zDr`V5OyQ$tHsEp_P)R_s4LHvRtRi5V4M?{E_Y!b{4S08vC6b2-SZ)KJvjLg)03?** zQ5$d$0nk3x=l9rvIs(qO0k_+LWd!8ffMy%8dLZCh8=w=AV`EGcfJwpvnCIE0(!oWN zhM!{ts)^1yUCsiZ-C(tLDFNJ_l7KgDKs^Cx+khu+fFP9s{Fe>5ff!jf#!qd)^#s_i zk-6Rm2>G06W7OM#Mq*^zfLS)6hJXaSm)U??0={8mjJ5%yf!Q{|X#)hQghcxnS|SlG zPjt=!8?b_PBmmtuV8cMb-8LYy7#OydGHTlR7^yBnCkZK=%$4MB5#uXvM1cPN8Mggr}|-Dt12-5hGT?oG8lr^u35U>mLX8zoQS02G&WG zpQ5Z;s}SH1e1hJ0xF5&O&bm$d$NGuh*Im7ba@N0&{&fu7uYcToB&YXd7oOt|JP$jO z9RKxIA~eP@()*}66JZZ`rw{ZVb%JI{|Erwr;Y1gpK7<|M=ncbN!2vpwe?pm#Po3e> z7z|ltFIqSmNjGQ8?PSWF05t)J8GKl`8UZVoT2a4pXfd0>U}*2!y8=g(D(Mn za(Yj=as`t+hCQL5t?%#ZSu2ti7k7bA&+=;ei;Mwb@q-XP2d)Rw$xFodyAFrHiMD3T znjc`zwRX)DQFH7{NW<6X^di^MzqK#@Eko|XOD*h6e;&`g?tNwhPlKkw6@H0RB=1QxC|(r*EAEuP+l1oq@YjKWFZMGoSp z;54BBb*6q4uD|}W99gBrl?_JZ3l@cjih(agzqon9mSY7hB5?0*hsHU~&?oVD|DB2W zY$C*lNT3R_A*ATjKM+Aj8F8$}-qE{SQ2%pKeJY499Jz{t=cin*I`&AhI3p*V^syh`=Qqa>|&cU;jJI zW=L19S%Uy-jDg04zC-??jV9=SWOI&E{wFAOv=7~YD5`lbsxwBM1@^l3g|RV{{><${ z#u}LO=`YRH4}~aX#oWO;0LDigL?ccWViZEQx63xE_$A`Ck5ZWv;#IO`02i|4=2C|F zTXY8I#|#cW)D8JC;?PasDX8y>KNBtals=C6E9fO61ei}Ve0qR$dq!8!+Q}i^6w>;l zFz-6hu=JCWM+7+!V>vHYby)s|^y660oA-V!n8A8CJB%R#W)N?3be{`cjOKWsc>%bf ztKib~0Df45y~<#(c@YHUFRJloto_eegX{~xUEgf2`fFPB23IJW?gOQ6mmv45IUlPj zPMO2BF)z;7dd;+*sP~f^QSZg9cPt8ScjAxv6ZjhQw}EgR^Mz(y7W)_AS`r7=nUdSH ziG@kmu{|GuVr`Hu_hY)^v&HTZHXhua>i`jP{;qhKdkxRa3XdV_{sv&?AJ|4?RSwWZ zf#1mjZ?V8=7Pw0m@K^;l%K{|S?O}mcED+6JE%TpY{x^}&z|LZTu((zmDAJw^Es6aS zvbXFj%!RNOLRp_jH%t|zvM*thur5NoVR}EMZYSDpuHv3T*I{gu=s965m5-R~W(%n< z0KQmIXCK0Py}OvqUd6%))#-gf5&H|G=uDW%Npw6kiC1dl-A}wS;(ZNx7=#Kh$2?Qu z9V6axGgIQ_+ITI*`!?}jMLQ8;S9l)tC9D&Dm~h3ZyAX^5j2N}qbt+3H^wl>9p?k5B zgk3W^iehl*#FVq!-T4`6fy;3RvN+>U;5--2KC(ne;O|HhlG36Jo#sq1#F2$yjQ$$O zmU#_W0?K0&^L(?mZ}sVi0Xm4`iBoEXmEk9h}!ZcrtFGQmvP$HEgx?02X>N zd|qdmqZ(m3v0n%ZIp(`pgFs9uV;L1%#}$ z>X{xP#Q+k-Dql7q2Q6$`xM7d56=kY@C&u&tUL{&ToHhIaHON`Ei~*kkWrci=TRdhx zKxm2{@R$q5?I<#PjktB_fl+2TZXv7KJz`ID9caS5bTVp;w5eiucjF%Q1R_x+-$ANFRj-s+tNn1J5i<8(UAZ(G{26BECY{ssucBNoM?h2H{U zO@Cd^LjRbBhpvN~K6)tU1N}93=Xs!N>KxH9Rk?_dv03vM?o66LjcG5T+sqnSC;|t3 z8ux0=AA%ZJ6ju9@7f*j;Js_=spox#;9!>mN2b1jYRV6pk{123|+x82JFnWr%HKQM- zvnAT*8ql^SplG-4Yj)elf{}2L=I9|<$XK3ju%-P0E4_NA+aY3yc{Y|8 zv7aV)_&Q`eluQ^iqy_JixKAz~fcpX#6tStOY15`T${2sjZEqL#;Cf?d!9jS=g+l+( zcUsh0ie=+&+X(LkH8Au9iC%&_&2vt0GALf?SiK*0J|Mw81ao0boA)7l9Z`R=0Xx9n zqDFf@2!AViE?6ST)R2rYjt^0DqS;r<{AT7)bCCRN!oCPgzDe%t*|umQmrlz41nZ5>;P=4g2sFh zA&pY3k%W&5-_-V3r28uTR`d~K0(4cJ=zjPlbUo0B%VZ-qAn85_T*UVxpnz_RmhAT6 z(L5$f?~)FS(~C7}M%RJ07YYl{qcze+g>z146^>q5ZP3DStEN`ll?jsF55wOM_AWZV0n1bM2D@{M+RIz5(0$R5@DdHpY3P zg?~oEK)=J>L75w?u%?s+u8{?Hu>kg^V1HR4nmtwK|AF~GLq7Fk13*$AG@~58(Z^a* zU-+`(70$?6{kd{r+3SP-8jcTXMf<~N7q>VgM;PJP_wOW*NOlyrI>U!)zxJbpU^>e) z{oGAzM_iz1O=)q93kM5yu8G&7asV_>!GZp>Y<+*-IdrXElGcRV=4(~S|2;<;;I`xFFK zT*in82`$t`ZI0=nc^3n+-fS*WY~Q;?MwtLg}rIu8IT z{704+CW_v!1;6J>ep3~{g(J!DF^gZv|Ayb=^98>@2ix856pP=7 z-usgLu3MjwZv-^VJ2BiK-+JWh>8-1Te6Pm?+h0OIZo3{NqJzB>YbO&^%uh4q*it&7J`eCIsot zVyeZ;B-1=TgHgBR=CPxs%7QTMnJ9g}C_NM~%!tpT6laxjV#;CpU!p4*{LR6PF%HF_ zXA~(~coExis`=#(s2<#KmiFmRuOGplae=ObYYY6CYMe9mrI`q#y1UpVL7Y!T!l`pg zNI#LZwJEIPDdroBBj*$|&YAE!G(*fd;4yi|c_O*ma8z1a`FDU6JZO&;!P5cWap`S7 zN+xy88YeNxHLeI{h(Lyq=f4=sV1$E*>KJf)+B}F=0Zu_-OMA_1pAm5s94yKYk3r?q zKcF*|gLMl$kgWJlU(qY!@o=$lKD5hG6mdkb;<8Rn3G2>vmpnQJb`Un(lzWqcZqbPWV*fmnQ*YE?aPn_-1CUb$Oi%- z)n!$`!eZT%5L-Q7+}Ja3_p!Av&)nl7^8rKP4eA%M04~s^S{L%oIj_z zRorIK`TIBA!XJzshQn@!N<>QSLDYZ&TN&?TI4Z}u*^h{yra#vek1R7*!3Bz3WUs|- z#lZ2%T{)KHVfi1%95B91!vRRP(FpBuiERzD3=1MS58XVslf0-GG!p&?4g^9-dw`Gb zOvXDdNy+4hWvB82a6T@GiKX6>?$tS z&HBGvqU^)4!xdiX`$G-V{%>qTj~77qM(r}Eqf{LIFJ(U0U%bo_`D-BMFxF@fFa(HW zBn))vi0HoU+jcI{(N$@PhIXSS8Y7$130OayQsfb72$a02VQ@x3L+UkYpoVf;LkNj) zn+LS@J+Ug##rVf*PxhBQ>M4SE%Bi7tuZ_F{eJy3f0W0%UiZbXb-|cq=%)Ucfd0vt z7o+pAbM%g1e+xww@HUhi^B0@}lUbP^|4fa}iLbaXb|VBcaQ^nq#Zgw@wP0PGhejX5 zS;L}3uk?ESFh*XC3=dnC*I%x6{4>R;zl(!j`tEQ=fclGp@7F)}jX8{(P@k+ngwf}U zOw{6kq!exe?*aV?cvReAjqmP^LB_{+rhr!1N4k-%eu&fyL3?17NjQ&NX&xFO%L0K= zNbmJ6iY-kngTar!TYI*Ddeonr0)pZ9Vsp?)+y0gzADesM;^gLwmd7#Lb8tUO-=So6 z6-|Z6*<-T^0lT{bAcGpg#3;qMZu&7bjq>Yn`o|m!MV;sRql@D)KdZCPTk9rjP(`$q zgDlbit3W|dY_t%)L&@8R%?F-8r)Mxv>)Kc9-n?ffH6O1xxHtFGLCB;KJo?^YM>`Is zkf94Wv=GwSyu;V=S&I9P9@h?c*ZXisN?m_A;!in~AH(INi$9^>tQ^AFcE zy4i7PsMc{J&E5GS^2BqR7T@dcx)*ptT)v_`?hOwR2m_DTD{8!cjDk}3N2#XAAQWMP z8@(DMWadkVWAqPGYNcbtE=4@MA*D=x-PP0!S0B)E9roAH6Y-YWzIW{uExFfXG0= z4;Av${)1J)VI#KHAxWI?3VSE}$Goq<9LGsD3Y4xWxD_<|{bO*B`zdg|7$H(`MhCV9 zQrsIhLuy+5DJZCW!=;o64qA4Se~!{mMFXGy)tAE8>v2^iSctn}uaS21Mt5wst}VxBcD@Jhg! z>!?g(FC_Mt=~u1yaH{z!To+%|KPn}_1=WhfjY+->E%&SP&Jk!y*g)8SAuI8?3iN-JIUoAE6d7)-eXt zqJ__kWZJ_>9_>dznX8JKKjI+?{=EeMnBZ3lUI6eW?1Kc^r_m8-A<_R)a4gmUvDo$k z65oa%;dO<6Kr=FqYZvrt7dl_iih42R+>i7OeE&i}hEmbVc+n{91!j(qQmYV??A?ndph^KyS|Uo@M2+J@YVt%3u@PtCrmf*@E7fL zZ$`0jPM`}B|Bu~WnKTTB)9Z^nF;`{n#Acy;!&uOWW}GXW3>kuY?odZ86*!@2MLaw& zb}JZ4*f;m%Lm<0fHe!jDvaiGRXMVR$9qB-yr2#10}qjmL}evA z<1n~AcypuZ&+g9OgJhsM;EX)P(~r>+b0s;@PNKN-Kg9b19sBSmRR6ugei0{xbAu=z zbo-Bk0}Exk`;n~t&I2ol_*njc;$=tZf-Eoj=^f`y1yk{y1;~^2HCb2(pI2Ez+Y^V!{sC<4*SsCCI1MK$iBsgWqI8N>uFpkFh^rQZ;?YNQ@Um`?Tjwr~;JEhoO?DPh?tOHgt(TvMvnD6)tP@#f-p=fD5bU_~?dl&IlDi|&&PMlDgdJ$lR zX#)M;Bc!ek>n(ad@|vdam6RmEnsH9Tpmo!%X&3C!E*zp2?PwhWiMk)z3xou}bmGy4 zyHQZ?72}i--x7n7$DA~YuY9`9^Pms#(;p?d@NjKx8iT04@1#LknK%cNTz~1hc-l56 z7+#A2U}-137udn16%qaG7dcZOV#S!*<$(>H%#a5*=!YSlv#{33 zT@goLS&mGyWWq%W-pwf!A%}2lD2k~(2)|87&%Q$?J!amFDGjbbhL}{aexuHp=?9OPmWqtgkGSHW}J~&tD)s9TLa;Q)O@3DXw6mIM;d&(Etig52)i<}xs0EX<+YM&TTWpRbQ#ZGrg%>!#=^^weu+5eU)A<^3%fa;RXGzB6rMP8=I0 z;yr;8caVHEd!ir>Nje}EdEPOD6~N|FjsaKE`^!E0%TiP2U;QYii073=BY#IxywEv@ zHUdZW;EMa{9KqShFb`y)4lJ;+P7v?b)$rZAMsF#1Gc>+)RzW8Ty0{MY585Ba0u@Tl zH+CNcOf%frh;rd2LmbQK33jtQu2RlFMei*?Q$Gp|*=sm&yHd~Uovu4?yQ(XrH>>_C`%?t*(DpWnK)*4^x9u0m#wzvfSExNgbxF-~egga{ZE=A!nXaRuHNK_?>2(S%}P{&^80EPM?~l7`;> zGDBU^g`HX-S#tE1%F$OUM_;KLeWfk}-JSUu6*%Y8pWMGV+kyR^2A?r=9Nv*Z^yoDN z%`*5}x9D0Z238b9Ked;sehf7*hyyaAg;^`r0M@5qX%(Wo&3YtTba>5Hw0c{MlO`2q<8PHE;3q4vs96dpqyLmGr70Fa6<<>P1DV4V+ba28|cl(OO53 z6EjBS?_wU>=j*1u6I{cw#Cuqrg5dDHo)+)*4RdZPdKwdFH2rQ69z6bxa;Ij@@^Gp9 z^qT3IOIIzSlI*km6Za!$`nG)?)T7h4mwbK2P`GUVXzBJ~b&n5k4aa-oPdGDg9|Hfu zUo^|PcK_h>f63!PEU@+G@XjY=lVUl|g*7AblSN1aufJbcI>lTzkp}`Ay^}GJ#_jhp za$<@x#kHIVV`wqPP-%%7K)Lrr?s11gM&T&14STd0TKQK@+kD+hxTqoyQsZ51dQlh; zfCteyqA*yM3X>}7Y zc;Va*HlIX_16d-S$JCgf$JbQFVB}=8Wks+|e8vSC$GV05foR zH}A08jAnJ=&Ow9Z$1)vDbE|(Sl02|eSzM9h?ihoaoy@`Rqe7ZrE(9wze*Z$gXBA8l zE8;vHoEb-wNb{L$MrOhAfWN-0Umb8+n_}OqQlk;PAxAM<9#S<^_if4)$NF z;Y~OSJjghy?!06G3SGtB*f`kf^cO*=+?|i3q^~2MvK#{8LVkLPafY+mo{q<4$Dspn zM=4&o?UZ*gmfh)Ww4d()Z1k=}wiewi`U8qGx0bMydFF&mKp%(25awnze!V2kecN@g zAjru?r-KGAch?eRX?TfLa*i(|h$Hn*<1D>09jxqjNq)O3gz;G?kgWLyDsRlozzcuE z9}qjSdoY-vtD)#=kzT^xK(W8mbuc`^mbxM*o2Ka}%fIN45tHJlu(M!EIEzn##4%SC z9Jaex;#<-rS6Ya#PQDIqzfhXLVXTa1@g!NE0q)JW@-Vh~A+(&1k5Z=V86NkB*#g7v zYz()7ovj&ZeuIYqaHi^oRG|nS%M)}{ zU<5!AP7ox-PV*TH^8-fhIHM)QoW*@o;O1e{V}TsJ)ldn2TI+*`>wh(_%+W{ja7d** zfOQkJ%c^(9MU=dI!*im4DE*2M{iU6FPM*)P-0I<3^EVLh0rh#TCJ#jBF@l^)+B6im zz5uXh$RS`I@K{rdK8hAz17^BkM5mfP&tb~~lKd`v7<4d*7Kq);N?Fp+lCKcRpndP( z@E+$*Eo}j;B4$kUR$$B16WLx_K8xl5g7Rjagu%*YyCm!cl5PCYCfW3Q#;OG^? zUL)QF0Yi)Me&i)EwqW-lfUt23j0#ND#jxnbW5_KnI`ysaeZUKPAX*B+JdZ%U4-Z0-Vm<`i2I(j49Mw91hLHia-WNHagM=#W zgi7~BGNu0J$BKoERx00uvNA79_-$7GK=&X{fbmAV^m{B9lI)TP1}Q5qb`Nu-*-hV~ ztP1}VN%sRNE>0iAMy|ts3Vx<%KjkHdXzV;7L0Y9iER={ViP*-1$Da`}e1RtW>k_t* zun1t7WL)M85>DKYrin_Y5xzpgGtJ-I_=R49pM3zucx1GDCF$P@co2KN$nnIk`ABfT zRTBCDi8x1k68B4hWOIKO7NIy=6Z;|3WOr;6_6NiVE5yz~d&syvWGr=I5+B8#0sVPs z9!{%@;5NQs;m7QpCEQaiic+2}!urTPLzXQGAkb8B*c^o^!C!=33(U=<;1x`vB;6a% zMIz=$9N#K&Z`g`QDmCpZHxi&(unCyodH@hfIV zM;!}Y0E?Q&@IukE?hP~)KBMGem>#%yzDHrQ;02k!j*nA=(X2iAI8LW#3^&q3da7P5 zyfHKeQ|PaO2gM72)0bBAutvq^Rf>sJFOH-KyVAl#Lc@CS?H4S+IzGlO2G59Ma{=s4 z!Dn>N$G1;be8Z1sjdI^s2hbq>M$2;2(uyEA8O_Be1?rvR>-bAth{=81I~WvFDnCWV z*q|ZL|Myq(7f z)$&KQc~C8auF374*mG!{Y>twPUq9g!Vc(gCa}WBfGsX|-B;pV3udo|Es(k*=DFCBG zWBEwo55#xJZp9s%fjt0x>_yhax5Z;WKtA?Q#d&(Xw1oHtgFXCOPL5HIK*a}r<#OF9 zzgX5+J_=a)G?@o~#nD}QFGWXN5oj<^wTCVA7m;%;-C;7%DB(bM{tJ;YuAO-FZG*E7 z)A={9hJN%UMx1yDZ^R*N1fa+;V!#~eullx4LP4_yMf`d#7H)JzPg&wbSj%zIFIn@gx3(Rzo_D64ACk7451MHS!1pTFFj5-=3>e%cj+0M zaOx3}L&O++idp~UC-m~n$Z+*-IRJ~xv(R54>_9*dn<9F-hXD)BW$CHdL&vJ}J;6bC zSTgb^>cW51S4BSO-IzBq;K2LKmuD@I2eS^WB}T@ znP<>$0?^D5%iW`G&f{#)9%amZlQ9=@10h$N2Ek@-JK7qAaZ{+L90FChaPsIM7G0Pd>*M6P8H-w_S z=J_dmpFM%#_!dkQ>&G0$sR=~x#wCsK=#_CSN7v^0qFF9|KMna_T#Rqs#;=i?+6 zP7zE`L8&z_*KLC22kV`I3v9d-HmBh$^UCc3N}DH-gD|_ zVw(j~qoNA`I{Zw ziBwswdNe1Wlij8Tkn}m!PA6hKWmYg#VQrBB;E7J-%l4 zh7|Lhf_)ec>PYuF*!zX-aybCdy6H|Rj`KF0u=@r8&;GOM#aq%lPNc3qSA^5p0s3Bh zqe=CHcpoGCGQsPJp-jM{qM8PG=3i|UGVxtH4ZUK*; z$#*hy^iSm})L+4r3pzW)Uj2xk*H`Lfr*dbpPeyaiPOC^+CiF_~azK-Kda(ni(jdu( zLJG8S4HHauG`R%_!N;#)l~Ww%>)^G2kMCMxTScV$Qyzc`w-+4d$MZf}Q%QZ`i}^Sp zMEjI@6~HYipM^x)r#sm0X!h~RqQ(s73kwhXw2E6-;^T_GqW2P(i>!4%)E zn}br8AI2eNpWhMhkNWgab!>3rB~rP`()%`cxU6$vI4>nme?bp5v*}!L(A9&(lkUz+ zxG(q+*V=I*ZWw&w$uFrd@V`R(hlqR#3kp}%)qjD6$iL!+sD+O3O^kpeM8FT_{akk1 zKfWRA#tJh0lJYXAsE)Hb?HQ{`S!U#S5XBz&Iq0H;Op?3EIa_qm6}zZT2Q_`6Bl3N0 zhIG9#Q~z+yf@FSz_@dxR{nISTwjz=Jb8M&RpSd!>k@;5t90Z8{v)kn>dRO#Mfh=_s z5*+;!U4&qe`JX5ldk8)xRHz0o_od()M)W8LUYPDNt)Nkf^Bh0HHU(Ig_DG!1=zYrx zW9Kyd-*OfEr@GWR4(UJg{Dy+#9LMEa3XX9MHLk*z#07jGKP*&8 zcPDLNY6iAahqk(wXGoJJ4v*{vZc(|b^&czn4)csmt*AV+^{o}J8Z&Z=%5z#@S@EaD z{3-3V`0gcNi&?>m@_tB&8?Bk4rnSebom{^l|BtHowbVKb6$_%e>|TUs!~ ze%yoC_vN;z)>kT=5~Qwi&`z~&!`&su4KRqYy9q=`l0S$qKVee^O7a;J@&4p?4@#K# zfh<(5|2BJqSchz=ku@&F{-ih+X>C% zUqOli5JJ&E$y)|r_sd&;k?cu%TZ-Gg^0oxGyX5T@-1f-Z$+&$?-sa<$Ye>)?M`~Mf z&p^5d_xH1CLPz%jbagLD-3p!l4L=S-wLZj3b}KD>}-PB@2*LEoMW zUrq~zXj~(_B-6N z!25{#wwmQaAAa;Wnr`VASka7yF6vng>8kLsF&|tmbj6-Yj9V+d!EY|}Yn)T4mil8N zCSDL%UqE3vpxrzZ!ezI2k|M%B>=(qK+?`E`X&^4N8PB{Jl4l?D;(d>}Q}GRci5cHI zb$6x#Ni*&eCHb)zoLM;xzdY4eXI8@^(_Qi2v~g*;$-&9-DY(UUmdSH}Jx<00(Cz0- ziFlaw9&&aIZR|7G{Q-yNxZKAZ%?uOs>>|JF8z?v!C^#Uks2_t)ZS^2B62vA+aLj%L zM5eG}!^vX>Rajo*JZ9(9s4iqYEZSq+GkoCMukQfqiB`Y1Kl=eg3zx^zH&ImMH6 zT3MejzB5n+Pr-fLr_cm(>})>5?fQG1il-t3hF;^~=EV`m-ej1N${ClSq6bBZkp7?; zw-|6Z%kTHryF1SWCEyTWGvqfS8Hs2of%};W5%Cp(nbFl0(`I12ogSczs*NE*9--xV zyMklBjb z4kPb!KQH3ePw2<_BF27Ae**i?7*H<)hq7dfVP%TP$E0zC5NK949p}CvMA(l`SwPF0u2V#9?lHlihd{U$713-qg9?ykE)iH5bFY85oTrln2y z;7GNm`!b{DDKOv({}SrVx?s4&Me;U$83e&RDy*2FPtnoEr1t9s)@B zZ3}>ZtjhA>1ah|KG+mExqQ$O7Zz;D`D>^P*M0NO19131XStjFLNWsY;pB`BYhU~EdS7$7Gwz(C)@>CG^2T$q1lBaOBgczvFYy z!nC{Qa$mPvU4J9pRL@C+)$NkviUk`#2C!f2bqYkolh6rNId;aU+65FZaf zmYg=>CmR)cT?g8y;bayz5HCYCjjDYgk-}Gos$U0(cj5tBcL;_HFJ_tZGsrQRsh^@rJ89*bY(V%3c=i`Zbn}@XjA^OC4 zg;8yT@T1=1{4-R`XG}(aO~;Ej?`YNdwo(uF74X#q9D6had*Rw)3d7!Lq=v(9^LC67 zMqYX`KCRF16e6u~g1d*R{XlZvDTI%lPsa>ra&R_ON+zXEu-iv%vT6TtP zZL5)f8?w;aFyECCYyHpMUGt#=v0B(9OP&E7yURmny#Gb?)&f#dz4d|@_gLK{k^SBL zFyGz7Sih7!;Oa~FsJs3Xso?~EnpFX_JrbrHLv^7)Mw z@CBy*2}R{0Va3dql9{V)W?ugBoH*r*EpeH8%=QWq&02)$rfNL~#C) z{D#{Vua6PO-T5~DN?t#qpA#+oH{?^^EyKtLc;(z;q;Ehr-%MkuN3eP~|A&@7P9&~? ze#;b}KBol#@rX7^`T5_qB-ww`R!!UWP29u&BT@GMKiM?2>wI#Ct1o?oy5rKG24Gkc zey7C0-NqjUeDop+yjm*oLEuSs7u&+QR_J)PZ06&Du$e!GJv7qq#}mil-AH5E&|q0M zc0!2chpKFYD7#vf4YTYt&`q8nzoX{I8vt~K$H_VCUP7Pp(A^`M#EU zWC(=8|CGan*(f9Os*!&5TdX0wT~_q(NbsJtI3VNGXOx=5P#=tOUXn4^qF6hK*Tk_V zaeDiUF<3esyfg!c%d7DbSG?DoedHK0^&@x<++2!U*b=% z_A+c$cvhge))`*uegtnfA1p~*;l_DZ=~DDO+kfGxTlE*k+o9n;ya4PQdpW~~c)^|c z_=5fRw;%@)6r>`*V|<;a1gpB!9Gf>voyjNXp}%Cge|^IK8^dD-5Dl1>{mJ*)K9WaP ztdLH93SGRpPRb+CXoXxta7hq?SpopXQ8`?S1Dah)oj_W_6r%BA?c;=S^6~u+k9Yv9 z`BbL`^89ps2X+fFGZ#Iv=7Rv-cz&&Srm@oFJN~@;d#{2BHrS*j<=;TD&*|=b4FJ4- zGZZzUO%6wi6K5JAnLO-x8O~c#q2Gr`Z1uTs|1RJfPK#GCYI7L5`Da`WrD||tW9w?y zv+rD3>GeNr`Y$+&Pv!bDr1ZhX;V}AbH{Ka5#@pX(HM}P)&Y%1BJ-)G2J{;`wqaM6G zhP5@1)Vk0?=e@}g7w$u7H@-6jcz?HKT-hPKrWB0#!IyP+QSR(M4ZVZ~8O5A5)$1!@g3Pc{C`7spuxuV=%$U{d7|r}ym;mCi0Z9`@-`^t z#cXQnE?xtb+U6hjG1YZJptuV1Hi0`>Whv_cD!WUlYy`r7OjIl+W9x3(8#r$NvXqOU z9r*=UqvY_E?e`C8!dxfMWOjO08=!FkBOtd0Ev=R|1m_J0G|R`t_4l?h)wBE6^DvXCg@+KMDQoyagc9)~<8VaG`G_aT7Ee zJTW(5@!a#Vg%i)cK>WQ>{LK}AFBX5th`(dS-|^z_MDdsVKk?j2;%}k&d%5^qB>onQ zzthCuE5u))_**9a2E^ZT@i!>`ep~#VDgMq9f3FsQXN$jc#9!`9#dEI{f9H$83&r0> z_=^kwD`NzEDzOFM!<-NuP5xpP^8OFZ_sjc_ct1tnFXR0(OXU5tER!$qNAun*?}NP0 zk@r95eTKYW&wGcw{|oOw*eK-C%X?GaKgIi3aGzV{$-QoT?y~W@HJ)5;YVORbxwAdF z*E=rEt+{ZVC%0|5!;@PxH8=lK`3DdDH{6kXSX1yD zaB1Ce$C7eKWplWy(bH1Z(CVpfjx>cmQ-MF+QCr>I(%P`Bwsk_aqpYf_sX6TNH8wU^ zSA}am&8;=Ht+h1*sm(EbPScG|%`2Kb;i~rLCQoDS^4i9!o{MwaE*|a(w6-?4dXU@F zT3gr9?x~U_8k(CNt+_z~KQ$6@nwrwY9BnAR!v>Xj|FT+|t(22EOYWnra%F zmZCB;-sY)ls_`tVt!j(3)-pR>U+cLkQq|ZHUg>GB8&GOVRcm!~O|7T3b}0yB`Go&V ztF3Bj?NrZ%iHUmWH@AcVZd(ZPs@tieEX$9t@!*>3neShS|JQy=Ra-3{!p)v#RpIJ- zQFm=S{=?ER5vMq`l>dfFR4|U=ftmJe~%05Cp*1lwC71+}zyQ=Ba6D1xL-TD^c1iCkrI#R=3uo z&%sN5Yb|(bXsq>wSGFKSHQmBvI-#wm9j&fzZVFd5G!duP(^h*^q_(NL7K$pa0p-I3 z%QrMh(Xewtq`J1X$?7N0<2sV3Gv^GHfK^gz)=qP9soq=Q=<69N~*DS1U3%52@hb_Is zn8v_s7QH*5K@HVOEgD}f@yjrzY06PVFstOibEcojFR;-w?w6(UW8T~e)rL}5F z!-TrliBO8_gcY4gJ^#`d)CitXZk@EZwu=It5JR)JW)VczPy;1Na#s?rOl^C0W2ENP zrRuBa_-EHlZJZjKT06C2YU|XSrcRpLHnn{!PJv%jiwS~m2uByKs%>8;Oqk%9b)(V* ztC+0-$;E~{W~=Fx+|@vY^wq@Wx4aIFe9WC~;fAmnk1!> zJtkO5782ehp(kG)gDfJH7T%wn$yn8C=t#e^rU8sJRfj=Zq6&tG2b*eY+bN6{@Fm-l zM+Mx3;U14Nl5(giMAg=lpqy_#ZPExxXDiu{9ggx9a=ve)n$as067zV*S^!X{Tx^La z<2!~2n&29eR}0=IjAC8n=9}SlRfWUwQfzaMu2lWu@d!kWkpvzDIFUlMuMrN%47ybV zth#D1oXE!7Q-Mzl0F_8}7~Zaft@gFRCB!(S$KG1o0xu3eTP~Ldv`h|ICZRVc%fa$e zEWQ)+KdJxiwhbtQ=2V3vs7-q4$>R+5!vqRn3o>t{yMV0Zg780sv*v^w8r!h)fO`rB zncZ+RCK$O0fj@lJoKVOfO|k^~A4PMfGg9a5cpOs>YKi zBp~!|es0Y|?4LVkHFB{sfEEY<3$0MRipdq3LgTCKhJpAg=ndk_qSZ*5)Oe4cdP4Nr8Zcu1 z^0W;?90%iBJ|v?f{Z5ALr~Bc$Qf%SwSE8r z72L7;OTy;ltPFdicfMSCIY&`HPg5Qqhgclf2p0f-%*pOcmA9s+B<>Q}c6|w}$*xC> zmES&Ka!so5wB;?@mK%Xlise0wHD)3hdD>Ibu5F|B$J~~SDc*Ls><+M^AB-Ca=La^} z>YO$@R?;sJu@kWj1zh?@x#J7+;W5Bdkk?6k(CmVjBo;7 z!ZDCSwH#Ox)PsOe*Cyp!jZo$-TvdIeT5Q5#w=^RJg2`W49vS>PNgsv}pGCPP7*a!p zd1x_1cxurKRCv!KA_$+a3_f>l>vFWfLXaYBMYyO{Qj<|ip&-M*guJBYss2dP&!iFh zIX!}?TZGnYaDPhYRzR_EZ>?H+n*I~RE4i2uS`xODqEu2*&T%!X3$v9lkb* zMZ|yPN{d4To`-V%#z@!C#;je|5|$hd=3BCoutSXvOWMLWHlT%qF}R%Q>E%_85t!&i zj;+s0@;gmY&vFq9bubhjhEskD*@-AQVJAtIzKwu3V!;oT%}ASCP8}Dq$pY^C(={$lE5UVqI4ol+1YhhWT_5Hx%nDLN_%EXX2ww0ChQiD zLOy@Dhq0lBt4o!$l5w~x!S95E&$R$fAUj58I&rkXF|ws2Rpqlhl_8G@3UJwqVU>5X ze5qZ2G9K^3uU?fuOtQS3q&PBjW2&qnXG5BkMscL4gA^RxNdFUlo)xm5x0uOG;e9{V znU1HCIp0f@Hy#PdlYHcF7_u=v>U0WjrpTnC1IJ?gK;o-I+Z5fRLHtBhof(3zq~noE zUPt-{d=$Y60*;aS_~T%^!oc0yF8OW4dTg$51eOCUQnrM4cNTOX58U zy!w?A?{|0>m%#geDtJB*&m&7oV#5%!EH2994d5MBc>OrSDlR1tSxJt3vSdLKd@l_g z2r)n=mOMl|)}?}amLhXHy=l2!^2vS|mn4z-MR@fJ&tlU;r!G?8<|%lt1yCtr3-Gh> zZywY?&==8|9MwM^PAE>ITO8!`F8uD-CErJJFD{`az~-r>k@*|aq?&zU`zyXH>EKY~ zfNK<<$ZP56z}b#`1-HhJMTB~=8b6PM|Iz{oJ=l=mF+}KLG;O2PiIdS_`Fnx{@gBx+ zi^A(0hzErcdKYy<0og|*E54uly)@`N?%%~P-;nKDJ`m5@k&gdng4IM*H=_7yZNHZmWj{H*_=WJ(u=mO9DrWMM{g-JO3epVY*P_bdwA+A-ZQ*q~ zJ+orL@GdknuV*dONg0~=4;KiC^-$CN(HBu{czD$Ne*OvaC}qu6ifb)KbAN{%wg z=}6m zNGe(fMQ+Y;y1zH!j0O$u-O|A5aL;N|>S*7s%cDnb7) z;N7k8o&?Y0vgR`}hz8APQr7u0QL}nW$51)fbD}%F-J(3%$BsKCpFb0o;g1+C8`Cy8 z<$Rrv0hu(NoYoGKf6}W^^zIwXzaaAe9sicy-LysW-#vhTPG{0@@oa<;aAhg@V0-CD zKKXY2MAA8;%2|GuQ^}pp8R?%$w&11`c!@U!3-4uDGB1;}ex_+K9crxbY&?pM;~Z?VF&0}L#Ti?U_C6$)NH zNDj6)Z3)B!PgKlIdGI8JiL%BWWgwmQ3OO%r23_qQ+1~HqUR<(6GBJ-xyEw?ck=G~j zTc_~mODy{@;qT(tqu>D>B5w$%JwK}87Y%}QywILI?v;Gm_=>NNROi`%j`ZLUFVb6p z@*Y(_TjVDGoKEhO6|PI+ea+TedJw`LU{?q~E(14k?8N{jUL}4<6`pSZ9(^cbk2qlD zU|Gzigp~J2;JJPw+x17>i%YdjCejZOCdebftZNs3cPqTf(oDeqiN4L00|u+9llxzi z6e}uOIAM8DZ)Ye^{#t%1`K-cy@zS(Lv*#R1s@Hv+ld_+obR=zCA!!><6oRlzxyk}u+af}e$N zsi#$j@KIpVs(-u%P$}UhfLr)h+|DXSIPH<+cT#>n3y{>0s{zkf@C6oJrNqAr@D>I4 z41%+tA4`J&y9HPMZ2J=%G#y*XHDiD}7-%&0NnF=RtJ*97EqGw8T}FYCL9 z?C}y3u}4;AQ#sq>ojSNIWuuhcO^drR=wyl8z!ug}~YA>SJB}wfD;U&cc22 zMLCqjFPJvIsbitoW=b77p|43rmN(r#3u%Z6#+dpUCJYbw%y(*>HZqe;u<6m zwu|x2%)PQ4ZG|kS+C>M6(O27FP<^S!Bre*kYhhC#`hF?onzNWKPnLy zU3`kTD0$VMka$0_@f4pzCjSF{yE{$j8)-g>-zI$Bl-G@e}|4R2p6*;hWWSsY=rh{7nk~PYU09w(u7d8JC5>SUe@_ zKdSKGQux-hh2N4?p7kd`v;M`{S7zC`!nd9+d~D5#iwW_QpNU_QDe=!hIEd}Ho>`vl zKPs>j{3kyXzZ8e?SoVB{Z#`T1rM$t#g!Lys6Mr27i-e6;_|`M?zD)fs*llFl5`}L) zTlhVI@!~V<@8JV3KC}M(EO}p{@U3UUS-*82kk7I&jrD-^wn5LE@J#(1 zGD!a_Cd&GsSM^)ZR{iS~O{@OoXY#*a;lHi$t!E3rVi11vGx0t7vi=VhzV&S3_YA^M zekT4Qh3~>y2I`OXY~ddogrEFO{2qM3gJr*=@U3SHzhw}9@-y-CFO~Qc6u$Lr;a3d8 zPktu;I)z`R@U3SHfAJvvDekQ&LAB|zz8x_9wY~gPmgrEFO z{QU}lmBP24zbyW`$+CXkYQK8^viSM<-!fQsi^Bgu&7FU6rPW==Z-G)#?51j46xzA! zN*B7#-c6R>MM@{Ig%!Jm%x0l3Xy2QkOW17gB{yLgO53y@Yg>ic`bP&x>%xhJvM)A5Hn8Eg)n-J1U(Tw05SpQpF+Ymq&&LpQ83oF) z=fs=E-w|&S|46)5d^X-9Q(qCEN5(Nfc7KfTd~uh!B0fj_%i^;5o#HKGrYUV-`VpSC zFCWVI4lvu7*JSO>55eah5#-5kSyAibH#)Bc>^A6TC<^~W1ge-`eu%)fcP`mS4| z{#@zJ)~ESp`ctO;Md`OlZ?@j=f5_CEFU9e!Psdar`o(kWtRL^e@o9aT0(ZLqEboo^ zdjR2~{T4fYWo}?``ch{8N>|0OZ%A*pejwAQO#Q&GM*Y7?Z?=9ZSZ~=+nfmg(V%TZu z2QmF->-{|rndO`ED0gbf@854kd};apBDhoemGFTSqnVU_GxxE6b}4=`?WfH2cS-*- z>CNlam!-c)dh>eq73u#{db9NlS^86E`j@4DSbDSdOPM}p>KEP;%kL@a&DP`c2@9F! zXMP^X^ZNMbd!v3uydviD>G{pu&(fy$^ScS$>HIEU9^)TEIO~_0@vrCnE=gaL-faDV zz854je<`zk%2!7FbJClwuS3Vh`jn|ZF8y87o2{<~bxWTz^()eUMtZaL1DQT$>ig85 zd_j7%^_5JYGWB)oAC%r~eQK{CJ5#29QTiq6&DQ&SL^Ac}mvB7q?|logY`!DDUiR;bF+v794+Uwiv!JY2c)vKeuLkMSmGdsQYth`cYewUD!>MQ-x{vFbr zt@rnWWR{otk8nK8tGW>NpCzMws_QJDKgaQDd;1W$)AhInFP*gg)31x=V|M!d{kGGW z@>=ad8S$m|w}Ly_KYmHH|6bWQ+y1|0<(u+a_CJ!@zf<;C_2&*n``?oNC9&=MyCCP^ z{4$Q``ItGu`N;2oKg`ne>YwUd9+fRIJ^K*O`f7H1y10SG=}DROt?z;ucD?jw>j$E| zsZW{u7h!GV`-2{gcw0t)EYhXL+Pdef7c^*Nf7dt?$b8DN|pU{uSxX)|a_~g+H{PGWEyb z8pB?LaWU2>Gmq!_nCAu-{!pJX^_929uydq0^LXn0|8>@zFTnAEdo-+Icc z@4Zp~lJsWl{e2vn<(+a`-}C%`7xAU}|1WT-{FiYMxeJS|1;oD=YL-1zvT__{F`mRFSDOA z)4weJcInO5r}^diOPTtvv3PDTmELUq@yve8q5PzOm(wq{-rrr5X+Pz({PO$%yAfYn zena3+^{0GsO#eNyZ?=8^|Dx@uyq5j@Gy7jYf&DrB!z|@OaS_Meb_v+ehB1B>ma z%=%NhA%?vmz1jLI#aOJb28v95Rr;5uH(Q_9SLQ!u>Q|(HMS8RK^~4_aDF^$%9@FxD z+ZS8^^-Q1gTIFBDfB&TC|7>ul`ak#fX#WO;hx#YB{pHMl$|3!iM*WcVRk8JTZeZb0 z$bZ7rFTEp%-6Xx4hi%5{|4X&rd@GJW|BO=UnNLJL!`CX0kK*{WJboA4>3XRAMvU(T zrN`{_lwpU9(^C!R*l&PQ7@i#dAnZBCoQ>K1d$KNEq+4jqsK4t1l)tLU< zr8ir@km*yVenI-*l-_Lpe5OyC`hlTn|4!-6*3V`7l&N2m{`aIeTc6tN$Ig_guMbE2 zpOfBfz5j2HOuhMwIG)ct_4}iq;c0tx8u-oF`5fMZG3;FNints6S_F9fqfcC}#N!9W zZm$oD-QFVELGS+Sjbislr^Ft=J!(dy$n?BVTor#Q#2(*$li1_C7m7We*-K8w8~Vh4 z|FB=|_YeF(Hy-cj*P~{g-#f*AUwoI??~6Yz_WR<`i~YX%ezD&dFN!@L{g~L}ug{8I zzg`x5Jn)BNzi&Jn6^`kjQ+m$>Z^AKd&)z0>`?y=|_I9t>?f0PA_p?Im>+5E*pMP%` z`+59hVn1K=e^EE#eER>ZIOhr3|7Uf#jvp9`_2o~*z8)9FZePA8c6;@NS?&2hh%4f6 zi+z9np4jd0>8Ho?a{IkW?Dq8M9j^QJRx;9GQhnYHK3$`ADE87m9q;$6qhh}wxJ~Tw zU`{s0^X2~RU1Gn#zgz6_sXsMukLC5S*xwU9E%x_G-xm9Q)PGyQH|k%9Ynk@_zG$=9 z_i?w2{r%4;#QvV=Gh)AAyw~A7qP<12zn^(r z?C)8g7yJ91?~47s%IU9;`StVH&xx0NV*Fdhe!sfg;mY3?;*xk+?DxSR6#Mz=x5XY` z`vbAx_k5m=`sDvN{4<67{rcnPu9&_T#s0qH-^I&1pQoLv`t+0W_)TIzufIt=KNiC; z5c~biPO-oq?5fOvT{ z#{Zbu&tFgYc)VAp{pZF0zH`OLt31v^S+lx`1_eRiv7Lk7V*6MGvHU1UWHaQE^iN9YVzKU_h-i+%y>BCn=-Cv zd@SSlXZ(?j@5uO*8GkC{yEFc5#`k6Xg^a(N@sk-poAFD5Tjlq17XBX@|0v@#&uTv( zug&-k8NWH>Eg4rbzBuE)jIYf2nvAcD%u6*aUqVgcJ^lf#U&i_%)`zhE4c4zvLGyM|d9V z3s{%2{uNgCBmNre|A+LjV9KS@TPH`0#?0(+F^mc3F++7hFISlExhO)=IA+=}CTq3B zv-MKJIrrhWi0%va1xE%2W(Xdg!kpwqoFO0cR2Pkj@tIPA>BFZEV|Ma_Q?z3zCulE5 zj?_8z@v5Di_BVuv33|g8Cf`L8ieSFz?%BR;2X@cGZOYV4!8z!QiP2--y}NhzmI_Xn zA124|?%m$Ay`6h6YUdpnw{x!FwX0OX@f!RlAwi?a)aY1uZ_h5MntP)wg6^K(J9z9! ztr)p+cIt3(Buqja_e|CfHQ9-d&it6T-iL|S!xYjCg$FKrPwMXN-HBoeWvv}O7sdGDgN~Cp z_x4Y*w4hPn(>U2C(yhV&T*XNOBg|F2y(eP~3#x8Qf|sN-vHI=X>ced3x~)VG0I z%+-Gts^5Op$ccBtwW2n!MUwWz^~7z>MD2&O#py;92jQT=@yGm9-Q0shQ~?$R-3113 z1x0b>#$xQ)*a*A@;er_o)dda=2FGqbPD78mbFxr7?ulYlQc;3tGXD?%}07GV?2N zX@Eoq=YpZGa{uHQy|C=LaR}!@hsIqorP!qsz}A(x(we(h#ddYL2*yUEAC(Z>wNrGX z(3Ef^6i&QS=zTJzbBG?x4re=uZ$;Py3JZpdaZbz~whoQW$d83FI=3*{p@rFQF`Aru zy$Cxe8qs}K0k=!RPgOKD(SVaUOvFyt9&t~4m1}nN7(m}zV?~^0zR-N+bdwUI$GKc1 z7;ZNY#hkG@fV0HuA-I=>GPFa<*_yg#VwzE+{lh_5(?#20aMKX0kptj$iq0{@T7rv{Wh{c=e0!B0_BO`rw%j zv*4qF$L7({mb5!X2{&7$e5SXjK+EW)*mYfNPq;3k+dAw#Iy8A0Ekvlavvm|dymQ=) z+x^gy2K=84!P^S%$8gtzDp5?&;5-)SH{j}+MxlpIc;Lh4&@dgd6vK^h>p)B9`{J<) z>>eMP#OcQGSZy4CQA?)z%ae+qa1zMyiijHku6X#{(uF@B{D_A7jOrSGc}i>JqwvOn zDhNCA6#EjzwTS)(D-05UNM{(4H!s`9llUc@0+Nge;+YP~L&*s8QQ9w91S6?vT~I#thbg|9CI@I`?<(626Yk6H>Z z4I5Q9J;vJAGz$Xw>P^AmMK zPuOSZbfZQ+k3G2!o)uoq*(IrXm{eXpYxRTBY_1g?V`}SLHXpJWb+cU$S821exaQLt zyQa9yh3@bM^V+!vwneB+w|9A0^_fr9GtWBCt&V!^OQRykn?JJK)+uits2+XjMxqPX z_Bfk9IA#v5PQIG9MaWX<{0NkHa69!Qn~&XX54Uv287|3Fa?)C{g)1uQ>9U2jSGtog zmR4-E)1=@$i9U)>-a@!lcehGBeY|9t8Y@2kquTI>u|cyPuJ90xGZb$^p&N|I-F+U( z$Bke-JW`&#^%xD!J5J`TW-nUgo5cxVE# z@)E~wWnvVW(Kt{ToY|nA&h<6MBujD32OW>#Zg3-wROiTJOtGYr5rlL|F^+14F%mQa zafl&xnR@bxXvM~aWbXm8F^cw~eRLY-m+<6M;kcLv4uNazvNaUFdOUdbi18i{b!$Ld w+ihdraTI*j2zfe|oDVaHmM help\n\n"); + PP "-i : [I]gnore the given taxonomy id.\n\n"); + PP "-l : minimum [L]ength : define the minimum amplication length. \n\n"); + PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n"); + PP "-r : [R]estricts the search to the given taxonomic id.\n\n"); + PP "-c : Consider that the database sequences are [c]ircular\n\n"); + PP "-3 : Three prime strict match\n\n"); + PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n"); + PP "-s : [S]ensitivity quorum\n\n"); + PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n"); + PP "-x : false positive quorum\n\n"); + PP "-D : set in [d]ouble strand mode\n\n"); + PP "-S : Set in [s]ingle strand mode\n\n"); + PP "-U : No multi match\n\n"); + PP "\n"); + PP "------------------------------------------\n"); + PP "Table result description : \n"); + PP "column 1 : serial number\n"); + PP "column 2 : primer1\n"); + PP "column 3 : primer2\n"); + PP "column 4 : good/bad\n"); + PP "column 5 : in sequence count\n"); + PP "column 6 : out sequence count\n"); + PP "column 7 : yule\n"); + PP "column 8 : in taxa count\n"); + PP "column 9 : out taxa count\n"); + PP "column 10 : coverage\n"); + PP "column 11 : specificity\n"); + PP "column 12 : minimum amplified length\n"); + PP "column 13 : maximum amplified length\n"); + PP "column 14 : average amplified length\n"); + PP "------------------------------------------\n"); + PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n"); + PP "------------------------------------------\n\n"); + PP "\n"); + } static void ExitUsage(int stat) @@ -56,7 +106,7 @@ void initoptions(poptions_t options) options->strict_exclude_quorum=0.1; options->sensitivity_quorum=0.9; options->false_positive_quorum=0.1; - options->strict_three_prime=2; + options->strict_three_prime=0; options->r=0; options->g=0; options->no_multi_match=FALSE; @@ -75,7 +125,7 @@ void printcurrenttime () /* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */ ts = localtime(&now); strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts); - fprintf(stderr,"#%d#, %s\n",now, buf); + fprintf(stderr,"#%d#, %s\n",(int)now, buf); } void printcurrenttimeinmilli() @@ -90,7 +140,125 @@ void printcurrenttimeinmilli() } /*TR: Added*/ + +void printapair(int32_t index,ppair_t pair, poptions_t options) +{ + uint32_t wellidentifiedtaxa; + + printf("%6d\t",index); + if (pair->asdirect1) + printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length)); + else + printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word, + options->primer_length),options->primer_length)); + + if (pair->asdirect2) + printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length)); + else + printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word, + options->primer_length),options->primer_length)); + + printf("\t%c%c", "bG"[(int)pair->p1->good],"bG"[(int)pair->p2->good]); + + + printf("\t%d", pair->inexample); + printf("\t%d", pair->outexample); + printf("\t%4.3f", pair->yule); + + printf("\t%d", pair->intaxa); + printf("\t%d", pair->outtaxa); + printf("\t%4.3f", (float)pair->intaxa/options->intaxa); + + wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa; + + //printf("\t%d", pair->notwellidentifiedtaxa); + + //printf("\t%d", (pair->intaxa + pair->outtaxa)); + + + + printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa)); + + + printf("\t%d", pair->mind); + printf("\t%d", pair->maxd); + printf("\t%3.2f\n", (float)pair->sumd/pair->inexample); + +} + +uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options) +{ + uint32_t i,j; + float q,qfp; + + for (i=0,j=0;i < count;i++) + { + if (options->insamples) + q = (float)sortedpairs[i]->inexample/options->insamples; + else q=1.0; + + if (options->outsamples) + qfp = (float)sortedpairs[i]->outexample/options->outsamples; + else qfp=0.0; + + sortedpairs[i]->quorumin = q; + sortedpairs[i]->quorumout = qfp; + sortedpairs[i]->yule = q -qfp; + + + sortedpairs[j]=sortedpairs[i]; + + + + if (q > options->sensitivity_quorum && + qfp < options->false_positive_quorum) + { + (void)taxonomycoverage(sortedpairs[j],options); + taxonomyspecificity(sortedpairs[j]); + j++; + } + } + + return j; +} + +void printpairs (ppairtree_t pairs, poptions_t options) +{ + ppair_t* sortedpairs; + ppair_t* index; + ppairlist_t pl; + size_t i,j; + int32_t count; + + //printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n"); + + fprintf(stderr,"Total pair count : %d\n",pairs->count); + + sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs"); + index=sortedpairs; + pl=pairs->first; + j=0; + while(pl->next) + { + for (i=0;ipaircount;i++,j++) + sortedpairs[j]=pl->pairs+i; + pl=pl->next; + } + + for (i=0;ipaircount;i++,j++) + sortedpairs[j]=pl->pairs+i; + + count=filterandsortpairs(sortedpairs,pairs->count,options); + + for (i=0;i < count;i++) + printapair(i,sortedpairs[i],options); + +} + + +#ifdef MASKEDCODE void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize) + { uint32_t i; uint32_t wordsize = options->primer_length; @@ -98,9 +266,9 @@ void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, ui double sens; double speci; float avg; - + quorumseqs = seqdbsize * 70 / 100; - + printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank); for (i=0; i < pairs.paircount; i++) @@ -121,9 +289,12 @@ void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, ui } } +#endif /* MASKEDCODE */ + /*updateseqparams: This function counts the insample and outsample sequences * and with each sequences adds a tag of the taxon to which the sequence beongs*/ -void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, + +void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, poptions_t options, int32_t *insamples, int32_t *outsamples) { uint32_t i; @@ -131,7 +302,7 @@ void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxo ecotx_t *tmptaxon; for (i=0;iisexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options); if (seqdb[i]->isexample) (*insamples)++; @@ -139,7 +310,7 @@ void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxo (*outsamples)++; taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid; - tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid); + tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid); if (tmptaxon) tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx); if (tmptaxon) @@ -154,7 +325,7 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options) /*set taxon rank for which result is to be given*/ for (i = 0; i < taxonomy->ranks->count; i++) { - if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0) + if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0) { options->taxonrankidx = i; break; @@ -168,47 +339,10 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options) } } /* to get db stats, totals of species, genus etc....*/ -int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, - poptions_t options) -{ - uint32_t i; - uint32_t j; - uint32_t nameslots = 500; - uint32_t namesindex = 0; - int32_t *ranktaxonids = ECOMALLOC(nameslots * sizeof(int32_t), "Error in taxon rank allocation"); - int32_t taxid; - - ecotx_t *tmptaxon; - for (i=0;itaxons->taxon[seqdb[i]->taxid].taxid; - tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid); - if (tmptaxon) - tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx); - if (tmptaxon) - { - for (j = 0; j < namesindex; j++) - { - if (tmptaxon->taxid == ranktaxonids[j]) break; - } - if (j < namesindex) continue; /* name is already in list, so no need to add it*/ - - if (namesindex == nameslots) - { - nameslots += 500; - ranktaxonids = ECOREALLOC(ranktaxonids, nameslots * sizeof(int32_t), "Cannot allocate pair rank taxon table"); - } - ranktaxonids[namesindex] = tmptaxon->taxid; - namesindex++; - } - } - ECOFREE(ranktaxonids, "free rank taxon table"); - - return namesindex; -} +#ifdef MASKEDCODE -void setoktaxforspecificity (ppairscount_t pairs) +void setoktaxforspecificity (ppairtree_t pairs) { uint32_t i; uint32_t j; @@ -216,7 +350,7 @@ void setoktaxforspecificity (ppairscount_t pairs) uint32_t l; int taxcount; int32_t taxid; - + for (i = 0; i < pairs->paircount; i++) { for (j = 0; j < pairs->pairs[i].taxsetindex; j++) @@ -235,13 +369,13 @@ void setoktaxforspecificity (ppairscount_t pairs) taxcount += pairs->pairs[i].ampset[l].seqidindex; break; } - + if (taxid != pairs->pairs[i].ampset[l].taxonids[0]) { if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0]; - taxcount++; + taxcount++; } - + if (taxcount > 1) break; } } @@ -251,6 +385,8 @@ void setoktaxforspecificity (ppairscount_t pairs) } } +#endif + int main(int argc, char **argv) { pecodnadb_t seqdb; /* of type ecoseq_t */ @@ -264,11 +400,11 @@ int main(int argc, char **argv) int32_t insamples=0; int32_t outsamples=0; uint32_t i; - - pwordcount_t words; - pprimercount_t primers; - pairscount_t pairs; - + + pwordcount_t words; + pprimercount_t primers; + ppairtree_t pairs; + int32_t rankdbstats = 0; //printcurrenttime(); @@ -290,9 +426,9 @@ int main(int argc, char **argv) /* -------------------- */ case 'h': /* help */ /* -------------------- */ - PrintHelp(); - exit(0); - break; + PrintHelp(); + exit(0); + break; /* ------------------------- */ case 'l': /* min amplification lenght */ @@ -337,7 +473,7 @@ int main(int argc, char **argv) strncpy(options.taxonrank, optarg, 19); options.taxonrank[19] = 0; break; - + /* -------------------- */ case 'x': /* strict matching quorum */ /* -------------------- */ @@ -396,22 +532,27 @@ int main(int argc, char **argv) fprintf(stderr,"Reading taxonomy database ..."); taxonomy = read_taxonomy(options.prefix,0); fprintf(stderr,"Ok\n"); - + setresulttaxonrank(taxonomy, &options); /*TR: set rank level for statistics*/ - + fprintf(stderr,"Reading sequence database ...\n"); seqdb = readdnadb(options.prefix,&seqdbsize); fprintf(stderr,"Ok\n"); fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize); - + updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples); + options.dbsize=seqdbsize; + options.insamples=insamples; + options.outsamples=outsamples; rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options); - fprintf(stderr,"Database is constituted of %5d examples\n",insamples); - fprintf(stderr," and %5d counterexamples\n",outsamples); + fprintf(stderr,"Database is constituted of %5d examples corresponding to %5d %s\n",insamples, + options.intaxa,options.taxonrank); + fprintf(stderr," and %5d counterexamples corresponding to %5d %s\n",outsamples, + options.outtaxa,options.taxonrank); fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats); fprintf(stderr,"\nIndexing words in sequences\n"); @@ -419,7 +560,7 @@ int main(int argc, char **argv) printcurrenttimeinmilli(); words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options); printcurrenttimeinmilli(); - + fprintf(stderr,"\n Strict primer count : %d\n",words->size); if (options.no_multi_match) @@ -460,13 +601,15 @@ int main(int argc, char **argv) /*TR: Added*/ pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options); - setoktaxforspecificity (&pairs); - printpairs (pairs, &options, rankdbstats, seqdbsize); - - - - ECOFREE(pairs.pairs,"Free pairs table"); + + // setoktaxforspecificity (&pairs); + + printpairs (pairs, &options); + + + + //ECOFREE(pairs.pairs,"Free pairs table"); return 0; } diff --git a/src/libecoPCR/ecoError.P b/src/libecoPCR/ecoError.P new file mode 100644 index 0000000..7c7ae71 --- /dev/null +++ b/src/libecoPCR/ecoError.P @@ -0,0 +1,15 @@ +ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoIOUtils.P b/src/libecoPCR/ecoIOUtils.P new file mode 100644 index 0000000..fc34a70 --- /dev/null +++ b/src/libecoPCR/ecoIOUtils.P @@ -0,0 +1,15 @@ +ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoMalloc.P b/src/libecoPCR/ecoMalloc.P new file mode 100644 index 0000000..ea3767b --- /dev/null +++ b/src/libecoPCR/ecoMalloc.P @@ -0,0 +1,15 @@ +ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecodna.P b/src/libecoPCR/ecodna.P new file mode 100644 index 0000000..b9a71b9 --- /dev/null +++ b/src/libecoPCR/ecodna.P @@ -0,0 +1,5 @@ +ecodna.o ecodna.P : ecodna.c /usr/include/string.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h ecoPCR.h \ + /usr/include/stdio.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h diff --git a/src/libecoPCR/ecofilter.P b/src/libecoPCR/ecofilter.P new file mode 100644 index 0000000..d46d3e0 --- /dev/null +++ b/src/libecoPCR/ecofilter.P @@ -0,0 +1,5 @@ +ecofilter.o ecofilter.P : ecofilter.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h diff --git a/src/libecoPCR/econame.P b/src/libecoPCR/econame.P new file mode 100644 index 0000000..4c9946c --- /dev/null +++ b/src/libecoPCR/econame.P @@ -0,0 +1,15 @@ +econame.o econame.P : econame.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecorank.P b/src/libecoPCR/ecorank.P new file mode 100644 index 0000000..75e09b9 --- /dev/null +++ b/src/libecoPCR/ecorank.P @@ -0,0 +1,15 @@ +ecorank.o ecorank.P : ecorank.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoseq.P b/src/libecoPCR/ecoseq.P new file mode 100644 index 0000000..6222690 --- /dev/null +++ b/src/libecoPCR/ecoseq.P @@ -0,0 +1,19 @@ +ecoseq.o ecoseq.P : ecoseq.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/string.h /usr/include/zlib.h /usr/include/zconf.h \ + /usr/include/sys/types.h /usr/include/unistd.h \ + /usr/include/sys/unistd.h /usr/include/sys/select.h \ + /usr/include/sys/_select.h diff --git a/src/libecoPCR/ecotax.P b/src/libecoPCR/ecotax.P new file mode 100644 index 0000000..489fc97 --- /dev/null +++ b/src/libecoPCR/ecotax.P @@ -0,0 +1,15 @@ +ecotax.o ecotax.P : ecotax.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoprimer/Makefile b/src/libecoprimer/Makefile index d68202b..4fc412c 100644 --- a/src/libecoprimer/Makefile +++ b/src/libecoprimer/Makefile @@ -10,7 +10,9 @@ SOURCES = goodtaxon.c \ queue.c \ libstki.c \ sortmatch.c \ + pairtree.c \ pairs.c \ + taxstats.c \ apat_search.c SRCS=$(SOURCES) diff --git a/src/libecoprimer/aproxpattern.c b/src/libecoprimer/aproxpattern.c index d924281..2dc13bf 100644 --- a/src/libecoprimer/aproxpattern.c +++ b/src/libecoprimer/aproxpattern.c @@ -61,7 +61,7 @@ void encodeSequence(ecoseq_t *seq) for (i=0;iSQ_length;i++,data++,cseq++) { - *data = encoder[(IS_UPPER(*cseq) ? *cseq - 'A' : 'Z')]; + *data = encoder[(IS_UPPER(*cseq) ? *cseq : 'Z') - 'A']; } } @@ -82,7 +82,7 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3 uint32_t inSequenceQuorum; uint32_t outSequenceQuorum; bool_t conserved = TRUE; - + //poslist_t ttt; diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index 5d26573..c65a7fd 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -79,28 +79,39 @@ typedef union { uint32_t value; } poslist_t, *ppostlist_t; -typedef struct { - word_t word; - uint32_t *directCount; - ppostlist_t directPos; +/** + * primer_t structure store fuzzy match positions for a primer + * on all sequences + */ - uint32_t *reverseCount; - ppostlist_t reversePos; - bool_t good; - uint32_t inexample; - uint32_t outexample; +typedef struct { + word_t word; //< code for the primer + uint32_t *directCount; //< Occurrence count on direct strand + ppostlist_t directPos; //< list of position list on direct strand + + uint32_t *reverseCount; //< Occurrence count on reverse strand + ppostlist_t reversePos; //< list of position list on reverse strand + + bool_t good; //< primer match more than quorum example and no + // more counterexample quorum. + + uint32_t inexample; //< count of example sequences matching primer + uint32_t outexample; //< count of counterexample sequences matching primer } primer_t, *pprimer_t; +/** + * primercount_t structure store fuzzy match positions for all primers + * on all sequences as a list of primer_t + */ typedef struct { - pprimer_t primers; + pprimer_t primers; uint32_t size; } primercount_t, *pprimercount_t; typedef struct { - word_t word; + pprimer_t primer; uint32_t position; bool_t strand; - bool_t good; /*TR: Added*/ } primermatch_t, *pprimermatch_t; /*TR: Added*/ @@ -109,6 +120,19 @@ typedef struct { uint32_t matchcount; } primermatchcount_t, *pprimermatchcount_t; +typedef struct { + pecoseq_t sequence; + bool_t strand; + const char *amplifia; + int32_t length; +} amplifia_t, *pamplifia_t; + +typedef struct { + pamplifia_t amplifias; + uint32_t ampcount; + uint32_t ampslot; +} amplifiacount_t, *pamplifiacount_t; + typedef struct { char *amplifia; int32_t *taxonids; @@ -124,30 +148,52 @@ typedef struct { } taxampset_t, *ptaxampset_t; typedef struct { - word_t w1; - word_t w2; - uint32_t inexample; /*inexample count*/ - uint32_t outexample; /*outexample count*/ - - uint32_t mind; - uint32_t maxd; - - uint32_t ampsetcount; - uint32_t ampsetindex; - pampseqset_t ampset; - - uint32_t taxsetcount; - uint32_t taxsetindex; - ptaxampset_t taxset; - - uint32_t oktaxoncount; -} pairs_t, *ppairs_t; + pprimer_t p1; + bool_t asdirect1; + pprimer_t p2; + bool_t asdirect2; + + amplifiacount_t pcr; + + uint32_t inexample; //< example sequence count + uint32_t outexample; //< counterexample sequence count + uint32_t intaxa; //< example taxa count + uint32_t outtaxa; //< counterexample taxa count + uint32_t notwellidentifiedtaxa; + + + // these statistics are relative to inexample sequences + + uint32_t mind; //< minimum distance between primers + uint32_t maxd; //< maximum distance between primers + uint32_t sumd; //< distance sum + float yule; + float quorumin; + float quorumout; +// +// uint32_t taxsetcount; +// uint32_t taxsetindex; +// ptaxampset_t taxset; +// +// uint32_t oktaxoncount; + +} pair_t, *ppair_t; /*TR: Added*/ + typedef struct { - ppairs_t pairs; - uint32_t paircount; -}pairscount_t, *ppairscount_t; + size_t paircount; + size_t pairslots; + void* next; + pair_t pairs[1]; +} pairlist_t, *ppairlist_t; + +typedef struct { + ppairlist_t first; + ppairlist_t last; + void *tree; + int32_t count; +} pairtree_t, *ppairtree_t; typedef struct { pword_t words; @@ -168,6 +214,18 @@ typedef struct { uint32_t size; } merge_t, *pmerge_t; +typedef struct { + const char *amplifia; + bool_t strand; + int32_t length; + int32_t taxoncount; + void *taxontree; +}amptotaxon_t, *pamptotaxon_t; + +typedef struct { + int32_t taxid; + void *amptree; +}taxontoamp_t, *ptaxontoamp_t; typedef struct { uint32_t lmin; //**< Amplifia minimal length @@ -189,6 +247,14 @@ typedef struct { bool_t no_multi_match; char taxonrank[20]; //TR to count ranks against a pair int32_t taxonrankidx; //TR to count ranks against a pair + + // Some statistics useful for options filters + + int32_t dbsize; + int32_t insamples; + int32_t outsamples; + int32_t intaxa; + int32_t outtaxa; } options_t, *poptions_t; typedef ecoseq_t **pecodnadb_t; @@ -232,7 +298,21 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3 void sortmatch(pprimermatch_t table,uint32_t N); +ppairtree_t initpairtree(ppairtree_t tree); +ppair_t pairintree (pair_t key,ppairtree_t pairlist); +ppair_t insertpair(pair_t key,ppairtree_t list); + + /*TR: Added*/ -pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options); +ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options); + +int32_t counttaxon(int32_t taxid); +int32_t getrankdbstats(pecodnadb_t seqdb, + uint32_t seqdbsize, + ecotaxonomy_t *taxonomy, + poptions_t options); +float taxonomycoverage(ppair_t pair, poptions_t options); +char ecoComplementChar(char base); +void taxonomyspecificity (ppair_t pair); #endif /* EPSORT_H_ */ diff --git a/src/libecoprimer/hashsequence.c b/src/libecoprimer/hashsequence.c index 8dfe6d4..af89025 100644 --- a/src/libecoprimer/hashsequence.c +++ b/src/libecoprimer/hashsequence.c @@ -201,3 +201,8 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word) return ~0; } +char ecoComplementChar(char base) +{ + return (base < 4)? !base & 3: 4; +} + diff --git a/src/libecoprimer/pairs.c b/src/libecoprimer/pairs.c index a5308a6..88e6c21 100644 --- a/src/libecoprimer/pairs.c +++ b/src/libecoprimer/pairs.c @@ -7,34 +7,40 @@ #include "ecoprimer.h" #include +#include -primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options); +static void buildPrimerPairsForOneSeq(uint32_t seqid, + pecodnadb_t seqdb, + pprimercount_t primers, + ppairtree_t pairs, + poptions_t options); -int32_t pairinlist (ppairs_t pairlist, word_t w1, word_t w2, uint32_t size) -{ - uint32_t i; - - for (i = 0; i < size; i++) - { - if (w1 == pairlist[i].w1 && w2 == pairlist[i].w2) return i; - if (w1 == pairlist[i].w2 && w2 == pairlist[i].w1) return i; - } - return -1; -} -char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid) + + + + +/************************************* + * + * pair collection management + * + *************************************/ + +#ifdef MASKEDCODE + +char *addamplifiasetelem (ppair_t pair, char* amplifia, int32_t taxid) { uint32_t i; uint32_t j; char *ampused = NULL; - + if(pair->ampsetcount == 0) { pair->ampsetcount = 500; pair->ampsetindex = 0; pair->ampset = ECOMALLOC(pair->ampsetcount * sizeof(ampseqset_t),"Cannot allocate amplifia set"); } - + for (i = 0; i < pair->ampsetindex; i++) { if (strcmp (pair->ampset[i].amplifia, amplifia) == 0) @@ -43,43 +49,43 @@ char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid) break; } } - + if (i == 0) { pair->ampset[i].seqidcount = 100; pair->ampset[i].seqidindex = 0; pair->ampset[i].taxonids = ECOMALLOC(pair->ampset[i].seqidcount * sizeof(uint32_t),"Cannot allocate amplifia sequence table"); } - + if (pair->ampsetindex == pair->ampsetcount) { pair->ampsetcount += 500; pair->ampset = ECOREALLOC(pair->ampset, pair->ampsetcount * sizeof(ampseqset_t), "Cannot allocate amplifia set"); } - + if (pair->ampset[i].seqidindex == pair->ampset[i].seqidcount) { pair->ampset[i].seqidcount += 100; pair->ampset[i].taxonids = ECOREALLOC(pair->ampset[i].taxonids, pair->ampset[i].seqidcount * sizeof(int32_t), "Cannot allocate amplifia sequence table"); } - + if (pair->ampset[i].amplifia == NULL) { pair->ampset[i].amplifia = amplifia; pair->ampsetindex++; } - + for (j = 0; j < pair->ampset[i].seqidindex; j++) { if (pair->ampset[i].taxonids[j] == taxid) break; } - + if (j == pair->ampset[i].seqidindex) pair->ampset[i].taxonids[pair->ampset[i].seqidindex++] = taxid; return ampused; } -void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia) +void addtaxampsetelem (ppair_t pair, int32_t taxid, char *amplifia) { uint32_t i; uint32_t j; @@ -90,42 +96,42 @@ void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia) pair->taxsetindex = 0; pair->taxset = ECOMALLOC(pair->taxsetcount * sizeof(taxampset_t),"Cannot allocate taxon set"); } - + for (i = 0; i < pair->taxsetindex; i++) { if (pair->taxset[i].taxonid == taxid) break; } - + if (i == 0) { pair->taxset[i].amplifiacount = 100; pair->taxset[i].amplifiaindex = 0; pair->taxset[i].amplifia = ECOMALLOC(pair->taxset[i].amplifiacount * sizeof(char *),"Cannot allocate amplifia table"); } - + if (pair->taxsetindex == pair->taxsetcount) { pair->taxsetcount += 500; pair->taxset = ECOREALLOC(pair->taxset, pair->taxsetcount * sizeof(taxampset_t), "Cannot allocate taxon set"); } - + if (pair->taxset[i].amplifiaindex == pair->taxset[i].amplifiacount) { pair->taxset[i].amplifiacount += 100; pair->taxset[i].amplifia = ECOREALLOC(pair->taxset[i].amplifia, pair->taxset[i].amplifiacount * sizeof(char *), "Cannot allocate amplifia table"); } - + if (pair->taxset[i].taxonid == 0) { pair->taxset[i].taxonid = taxid; pair->taxsetindex++; } - + for (j = 0; j < pair->taxset[i].amplifiaindex; j++) { if (strcmp(pair->taxset[i].amplifia[j], amplifia) == 0) break; } - + if (j == pair->taxset[i].amplifiaindex) { pair->taxset[i].amplifia[j] = amplifia; @@ -135,140 +141,62 @@ void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia) char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len) { + fprintf(stderr,"start : %d length : %d\n",start,len); char *amplifia = ECOMALLOC((len + 1) * sizeof(char),"Cannot allocate amplifia"); char *seqc = &seq->SQ[start]; - + strncpy(amplifia, seqc, len); return amplifia; } +#endif /*TR: Added*/ -pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options) +ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options) { uint32_t i; - uint32_t j; - uint32_t k; - uint32_t d; - uint32_t strt; - uint32_t end; - uint32_t paircount = 0; - uint32_t pairslots = 500; - int32_t foundindex; - ppairs_t pairs; - pairscount_t primerpairs; - primermatchcount_t seqmatchcount; - word_t w1; - word_t w2; - char *amplifia; - char *oldamp; - + ppairtree_t primerpairs; - pairs = ECOMALLOC(pairslots * sizeof(pairs_t),"Cannot allocate pairs table"); + primerpairs = initpairtree(NULL); for (i=0; i < seqdbsize; i++) { - seqmatchcount = buildPrimerPairsForOneSeq(i, primers, options); - if (seqmatchcount.matchcount == 0) continue; - - for (j=0; j < seqmatchcount.matchcount; j++) - { - strt = 0; - w1 = seqmatchcount.matches[j].word; - /*first word should b on direct strand*/ - if (!seqmatchcount.matches[j].strand) - w1 = ecoComplementWord(w1, options->primer_length); - else - strt = options->primer_length; - - for (k=j+1; k < seqmatchcount.matchcount; k++) - { - end = 0; - w2 = seqmatchcount.matches[k].word; - /*second word should be on reverse strand*/ - if (seqmatchcount.matches[k].strand) - w2 = ecoComplementWord(w2, options->primer_length); - else - end = options->primer_length; - - if (!(seqmatchcount.matches[j].good || seqmatchcount.matches[k].good)) continue; - if (w1 == w2) continue; - - d = seqmatchcount.matches[k].position - seqmatchcount.matches[j].position; - if (d >= options->lmin && d <= options->lmax) - { - /*get amplified string*/ - amplifia = getamplifia (seqdb[i], seqmatchcount.matches[j].position + strt, d - strt - end); - - foundindex = pairinlist(pairs, w1, w2, paircount); - if (foundindex != -1) /*pair is found*/ - { - if (seqdb[i]->isexample) - pairs[foundindex].inexample++; - else - pairs[foundindex].outexample++; - - if (pairs[foundindex].mind > d) pairs[foundindex].mind = d; - else if (pairs[foundindex].maxd < d) pairs[foundindex].maxd = d; - - oldamp = addamplifiasetelem (&pairs[foundindex], amplifia, seqdb[i]->ranktaxonid); - /*if exact same string is already in amplifia set then use that for taxon set, it will help for - * calculating the fully identified taxons i.e specificity, we will compare pointrs instead of strings - * because same string means same pointer*/ - if (oldamp) - { - ECOFREE (amplifia, "free amplifia"); - amplifia = oldamp; - } - addtaxampsetelem (&pairs[foundindex], seqdb[i]->ranktaxonid, amplifia); - - continue; - } - - if (paircount == pairslots) - { - pairslots += 500; - pairs = ECOREALLOC(pairs, pairslots * sizeof(pairs_t), "Cannot allocate pairs table"); - } - pairs[paircount].w1 = w1; - pairs[paircount].w2 = w2; - if (seqdb[i]->isexample) pairs[paircount].inexample = 1; - else pairs[paircount].outexample = 1; - pairs[paircount].mind = d; - pairs[paircount].maxd = d; - oldamp = addamplifiasetelem (&pairs[paircount], amplifia, seqdb[i]->ranktaxonid); - addtaxampsetelem (&pairs[paircount], seqdb[i]->ranktaxonid, amplifia); - - paircount++; - } - else if (d > options->lmax) - break; /*once if the distance is greater than lmax then it will keep on increasing*/ - } - } - ECOFREE(seqmatchcount.matches, "Cannot free matches table"); + buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options); } - primerpairs.pairs = ECOREALLOC(pairs, paircount * sizeof(pairs_t), "Cannot allocate pairs table"); - primerpairs.paircount = paircount; + return primerpairs; + } -primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options) -{ - uint32_t i,j,k; - uint32_t matchcount=0; - pprimermatch_t matches = NULL; - primermatchcount_t seqmatchcount; +#define DMAX (2000000000) + +static void buildPrimerPairsForOneSeq(uint32_t seqid, + pecodnadb_t seqdb, + pprimercount_t primers, + ppairtree_t pairs, + poptions_t options) +{ + static uint32_t paircount=0; + uint32_t i,j,k; + uint32_t matchcount=0; + pprimermatch_t matches = NULL; + primermatchcount_t seqmatchcount; + ppair_t pcurrent; + pair_t current; + pprimer_t wswp; + bool_t bswp; + size_t distance; + bool_t strand; - seqmatchcount.matchcount = 0; - seqmatchcount.matches = NULL; - for (i=0;i < primers->size; i++) { matchcount+=primers->primers[i].directCount[seqid]; matchcount+=primers->primers[i].reverseCount[seqid]; } - - if (matchcount <= 0) return seqmatchcount; + + if (matchcount <= 0) + return; + matches = ECOMALLOC(matchcount * sizeof(primermatch_t),"Cannot allocate primers match table"); for (i=0,j=0;i < primers->size; i++) @@ -277,17 +205,15 @@ primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t prime { if (primers->primers[i].directCount[seqid]==1) { - matches[j].word = primers->primers[i].word; + matches[j].primer = primers->primers+i; matches[j].strand=TRUE; - matches[j].good=primers->primers[i].good;/*TR: Added*/ matches[j].position=primers->primers[i].directPos[seqid].value; j++; } else for (k=0; k < primers->primers[i].directCount[seqid]; k++,j++) { - matches[j].word = primers->primers[i].word; + matches[j].primer = primers->primers+i; matches[j].strand=TRUE; - matches[j].good=primers->primers[i].good;/*TR: Added*/ matches[j].position=primers->primers[i].directPos[seqid].pointer[k]; } } @@ -296,26 +222,144 @@ primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t prime { if (primers->primers[i].reverseCount[seqid]==1) { - matches[j].word = primers->primers[i].word; + matches[j].primer = primers->primers+i; matches[j].strand=FALSE; - matches[j].good=primers->primers[i].good;/*TR: Added*/ matches[j].position=primers->primers[i].reversePos[seqid].value; j++; } else for (k=0; k < primers->primers[i].reverseCount[seqid]; k++,j++) { - matches[j].word = primers->primers[i].word; + matches[j].primer = primers->primers+i; matches[j].strand=FALSE; - matches[j].good=primers->primers[i].good;/*TR: Added*/ matches[j].position=primers->primers[i].reversePos[seqid].pointer[k]; } } } - sortmatch(matches,matchcount); // sort in asscending order by position - - /*TR: Added*/ - seqmatchcount.matches = matches; - seqmatchcount.matchcount = matchcount; - return seqmatchcount; + if (matchcount>1) + { +// fprintf(stderr,"\n====================================\n"); + + sortmatch(matches,matchcount); // sort in ascending order by position + + for (i=0; i < matchcount;i++) + { + // For all primers matching the sequence + + for(j=i+1; + (jprimer_length) < options->lmax); + j++ + ) + + // For all not too far primers + + if ( (matches[i].primer->good || matches[j].primer->good) + && (distance > options->lmin) + ) + { + + // If possible primer pair + + current.p1 = matches[i].primer; + current.asdirect1=matches[i].strand; + current.p2 = matches[j].primer; + current.asdirect2= !matches[j].strand; + current.maxd=DMAX; + current.mind=DMAX; + current.sumd=0; + current.inexample=0; + current.outexample=0; + + + // Standardize the pair + + strand = current.p2->word > current.p1->word; + if (!strand) + { + wswp = current.p1; + current.p1=current.p2; + current.p2=wswp; + + bswp = current.asdirect1; + current.asdirect1=current.asdirect2; + current.asdirect2=bswp; + } + + + // Look for the new pair in already seen pairs + + pcurrent = insertpair(current,pairs); + + + if (seqdb[seqid]->isexample) + + { + pcurrent->inexample++; + pcurrent->sumd+=distance; + + if ((pcurrent->maxd==DMAX) || (distance > pcurrent->maxd)) + pcurrent->maxd = distance; + + if (distance < pcurrent->mind) + pcurrent->mind = distance; + } + else + pcurrent->outexample++; + + if ((pcurrent->outexample+pcurrent->inexample)==1) + { + paircount++; + pcurrent->pcr.ampslot=200; + pcurrent->pcr.ampcount=0; + pcurrent->pcr.amplifias = ECOMALLOC(sizeof(amplifia_t)*pcurrent->pcr.ampslot, + "Cannot allocate amplifia table"); + } + else + { + if (pcurrent->pcr.ampslot==pcurrent->pcr.ampcount) + { + pcurrent->pcr.ampslot+=200; + pcurrent->pcr.amplifias = ECOREALLOC(pcurrent->pcr.amplifias, + sizeof(amplifia_t)*pcurrent->pcr.ampslot, + "Cannot allocate amplifia table"); + } + } + + pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].length=distance; + pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].sequence=seqdb[seqid]; + pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].strand=strand; + + if (strand) + pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[i].position + options->primer_length; + else + pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ; + + pcurrent->pcr.ampcount++; +// fprintf(stderr,"%c%c W1 : %s direct : %c", +// "bG"[(int)pcurrent->p1->good], +// "bG"[(int)pcurrent->p2->good], +// ecoUnhashWord(pcurrent->p1->word, options->primer_length), +// "><"[(int)pcurrent->asdirect1] +// ); +// +// fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n", +// ecoUnhashWord(pcurrent->p2->word, options->primer_length), +// "><"[(int)pcurrent->asdirect2], +// distance, +// pcurrent->mind,pcurrent->maxd, +// (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0, +// pcurrent->inexample,pcurrent->outexample, +// " N"[(pcurrent->outexample+pcurrent->inexample)==1], +// paircount +// +// ); +// + + } + } + } + + pairs->count=paircount; + } diff --git a/tools/ecoPCRFormat.py b/tools/ecoPCRFormat.py new file mode 100755 index 0000000..3884001 --- /dev/null +++ b/tools/ecoPCRFormat.py @@ -0,0 +1,651 @@ +#!/usr/bin/env python + +import re +import gzip +import struct +import sys +import time +import getopt + +try: + import psycopg2 + _dbenable=True +except ImportError: + _dbenable=False + +##### +# +# +# Generic file function +# +# +##### + +def universalOpen(file): + if isinstance(file,str): + if file[-3:] == '.gz': + rep = gzip.open(file) + else: + rep = open(file) + else: + rep = file + return rep + +def universalTell(file): + if isinstance(file, gzip.GzipFile): + file=file.myfileobj + return file.tell() + +def fileSize(file): + if isinstance(file, gzip.GzipFile): + file=file.myfileobj + pos = file.tell() + file.seek(0,2) + length = file.tell() + file.seek(pos,0) + return length + +def progressBar(pos,max,reset=False,delta=[]): + if reset: + del delta[:] + if not delta: + delta.append(time.time()) + delta.append(time.time()) + + delta[1]=time.time() + elapsed = delta[1]-delta[0] + percent = float(pos)/max * 100 + remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent))) + bar = '#' * int(percent/2) + bar+= '|/-\\-'[pos % 5] + bar+= ' ' * (50 - int(percent/2)) + sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain)) + +##### +# +# +# NCBI Dump Taxonomy reader +# +# +##### + +def endLessIterator(endedlist): + for x in endedlist: + yield x + while(1): + yield endedlist[-1] + +class ColumnFile(object): + + def __init__(self,stream,sep=None,strip=True,types=None): + if isinstance(stream,str): + self._stream = open(stream) + elif hasattr(stream,'next'): + self._stream = stream + else: + raise ValueError,'stream must be string or an iterator' + self._delimiter=sep + self._strip=strip + if types: + self._types=[x for x in types] + for i in xrange(len(self._types)): + if self._types[i] is bool: + self._types[i]=ColumnFile.str2bool + else: + self._types=None + + def str2bool(x): + return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False})) + + str2bool = staticmethod(str2bool) + + + def __iter__(self): + return self + + def next(self): + ligne = self._stream.next() + data = ligne.split(self._delimiter) + if self._strip or self._types: + data = [x.strip() for x in data] + if self._types: + it = endLessIterator(self._types) + data = [x[1](x[0]) for x in ((y,it.next()) for y in data)] + return data + +def taxonCmp(t1,t2): + if t1[0] < t2[0]: + return -1 + elif t1[0] > t2[0]: + return +1 + return 0 + +def bsearchTaxon(taxonomy,taxid): + taxCount = len(taxonomy) + begin = 0 + end = taxCount + oldcheck=taxCount + check = begin + end / 2 + while check != oldcheck and taxonomy[check][0]!=taxid : + if taxonomy[check][0] < taxid: + begin=check + else: + end=check + oldcheck=check + check = (begin + end) / 2 + + + if taxonomy[check][0]==taxid: + return check + else: + return None + + + +def readNodeTable(file): + + file = universalOpen(file) + + nodes = ColumnFile(file, + sep='|', + types=(int,int,str, + str,str,bool, + int,bool,int, + bool,bool,bool,str)) + print >>sys.stderr,"Reading taxonomy dump file..." + taxonomy=[[n[0],n[2],n[1]] for n in nodes] + print >>sys.stderr,"List all taxonomy rank..." + ranks =list(set(x[1] for x in taxonomy)) + ranks.sort() + ranks = dict(map(None,ranks,xrange(len(ranks)))) + + print >>sys.stderr,"Sorting taxons..." + taxonomy.sort(taxonCmp) + + print >>sys.stderr,"Indexing taxonomy..." + index = {} + for t in taxonomy: + index[t[0]]=bsearchTaxon(taxonomy, t[0]) + + print >>sys.stderr,"Indexing parent and rank..." + for t in taxonomy: + t[1]=ranks[t[1]] + t[2]=index[t[2]] + + + return taxonomy,ranks,index + +def nameIterator(file): + file = universalOpen(file) + names = ColumnFile(file, + sep='|', + types=(int,str, + str,str)) + for taxid,name,unique,classname,white in names: + yield taxid,name,classname + +def mergedNodeIterator(file): + file = universalOpen(file) + merged = ColumnFile(file, + sep='|', + types=(int,int,str)) + for taxid,current,white in merged: + yield taxid,current + +def deletedNodeIterator(file): + file = universalOpen(file) + deleted = ColumnFile(file, + sep='|', + types=(int,str)) + for taxid,white in deleted: + yield taxid + +def readTaxonomyDump(taxdir): + taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir) + + print >>sys.stderr,"Adding scientific name..." + + alternativeName=[] + for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir): + alternativeName.append((name,classname,index[taxid])) + if classname == 'scientific name': + taxonomy[index[taxid]].append(name) + + print >>sys.stderr,"Adding taxid alias..." + for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir): + index[taxid]=index[current] + + print >>sys.stderr,"Adding deleted taxid..." + for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir): + index[taxid]=None + + return taxonomy,ranks,alternativeName,index + +def readTaxonomyDB(dbname): + connection = psycopg2.connect(database=dbname) + + cursor = connection.cursor() + cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon") + taxonomy=[list(x) for x in cursor] + + cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class") + ranks=cursor.fetchall() + ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks)))) + + print >>sys.stderr,"Sorting taxons..." + taxonomy.sort(taxonCmp) + + print >>sys.stderr,"Indexing taxonomy..." + index = {} + for t in taxonomy: + index[t[0]]=bsearchTaxon(taxonomy, t[0]) + + print >>sys.stderr,"Indexing parent and rank..." + for t in taxonomy: + t[1]=ranks[t[1]] + try: + t[2]=index[t[2]] + except KeyError,e: + if t[2] is None and t[0]==1: + t[2]=index[t[0]] + else: + raise e + + cursor.execute("select taxid,name,category from ncbi_taxonomy.name") + + alternativeName=[] + for taxid,name,classname in cursor: + alternativeName.append((name,classname,index[taxid])) + if classname == 'scientific name': + taxonomy[index[taxid]].append(name) + + cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias") + + print >>sys.stderr,"Adding taxid alias..." + for taxid,current in cursor: + if current is not None: + index[taxid]=index[current] + else: + index[taxid]=None + + + return taxonomy,ranks,alternativeName,index + +##### +# +# +# Genbank/EMBL sequence reader +# +# +##### + +def entryIterator(file): + file = universalOpen(file) + rep =[] + for ligne in file: + rep.append(ligne) + if ligne == '//\n': + rep = ''.join(rep) + yield rep + rep = [] + +def fastaEntryIterator(file): + file = universalOpen(file) + rep =[] + for ligne in file: + if ligne[0] == '>' and rep: + rep = ''.join(rep) + yield rep + rep = [] + rep.append(ligne) + if rep: + rep = ''.join(rep) + yield rep + +_cleanSeq = re.compile('[ \n0-9]+') + +def cleanSeq(seq): + return _cleanSeq.sub('',seq) + + +_gbParseID = re.compile('(?<=^LOCUS {7})[^ ]+(?= )',re.MULTILINE) +_gbParseDE = re.compile('(?<=^DEFINITION {2}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL) +_gbParseSQ = re.compile('(?<=^ORIGIN).+?(?=^//$)',re.MULTILINE+re.DOTALL) +_gbParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")') + +def genbankEntryParser(entry): + Id = _gbParseID.findall(entry)[0] + De = ' '.join(_gbParseDE.findall(entry)[0].split()) + Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper()) + try: + Tx = int(_gbParseTX.findall(entry)[0]) + except IndexError: + Tx = None + return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq} + +###################### + +_cleanDef = re.compile('[\nDE]') + +def cleanDef(definition): + return _cleanDef.sub('',definition) + +_emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE) +_emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL) +_emblParseSQ = re.compile('(?<=^ ).+?(?=^//$)',re.MULTILINE+re.DOTALL) +_emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")') + +def emblEntryParser(entry): + Id = _emblParseID.findall(entry)[0] + De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split()) + Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper()) + try: + Tx = int(_emblParseTX.findall(entry)[0]) + except IndexError: + Tx = None + return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq} + + +###################### + +_fastaSplit=re.compile(';\W*') + +def parseFasta(seq): + seq=seq.split('\n') + title = seq[0].strip()[1:].split(None,1) + id=title[0] + if len(title) == 2: + field = _fastaSplit.split(title[1]) + else: + field=[] + info = dict(x.split('=',1) for x in field if '=' in x) + definition = ' '.join([x for x in field if '=' not in x]) + seq=(''.join([x.strip() for x in seq[1:]])).upper() + return id,seq,definition,info + + +def fastaEntryParser(entry): + id,seq,definition,info = parseFasta(entry) + Tx = info.get('taxid',None) + if Tx is not None: + Tx=int(Tx) + return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq} + + +def sequenceIteratorFactory(entryParser,entryIterator): + def sequenceIterator(file): + for entry in entryIterator(file): + yield entryParser(entry) + return sequenceIterator + + +def taxonomyInfo(entry,connection): + taxid = entry['taxid'] + curseur = connection.cursor() + curseur.execute(""" + select taxid,species,genus,family, + taxonomy.scientificName(taxid) as sn, + taxonomy.scientificName(species) as species_sn, + taxonomy.scientificName(genus) as genus_sn, + taxonomy.scientificName(family) as family_sn + from + ( + select alias as taxid, + taxonomy.getSpecies(alias) as species, + taxonomy.getGenus(alias) as genus, + taxonomy.getFamily(alias) as family + from taxonomy.aliases + where id=%d ) as tax + """ % taxid) + rep = curseur.fetchone() + entry['current_taxid']=rep[0] + entry['species']=rep[1] + entry['genus']=rep[2] + entry['family']=rep[3] + entry['scientific_name']=rep[4] + entry['species_sn']=rep[5] + entry['genus_sn']=rep[6] + entry['family_sn']=rep[7] + return entry + +##### +# +# +# Binary writer +# +# +##### + +def ecoSeqPacker(sq): + + compactseq = gzip.zlib.compress(sq['sequence'],9) + cptseqlength = len(compactseq) + delength = len(sq['definition']) + + totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength + + packed = struct.pack('> I I 20s I I I %ds %ds' % (delength,cptseqlength), + totalSize, + sq['taxid'], + sq['id'], + delength, + len(sq['sequence']), + cptseqlength, + sq['definition'], + compactseq) + + assert len(packed) == totalSize+4, "error in sequence packing" + + return packed + +def ecoTaxPacker(tx): + + namelength = len(tx[3]) + + totalSize = 4 + 4 + 4 + 4 + namelength + + packed = struct.pack('> I I I I I %ds' % namelength, + totalSize, + tx[0], + tx[1], + tx[2], + namelength, + tx[3]) + + return packed + +def ecoRankPacker(rank): + + namelength = len(rank) + + packed = struct.pack('> I %ds' % namelength, + namelength, + rank) + + return packed + +def ecoNamePacker(name): + + namelength = len(name[0]) + classlength= len(name[1]) + totalSize = namelength + classlength + 4 + 4 + 4 + 4 + + packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength), + totalSize, + int(name[1]=='scientific name'), + namelength, + classlength, + name[2], + name[0], + name[1]) + + return packed + +def ecoSeqWriter(file,input,taxindex,parser): + output = open(file,'wb') + input = universalOpen(input) + inputsize = fileSize(input) + entries = parser(input) + seqcount=0 + skipped = [] + + output.write(struct.pack('> I',seqcount)) + + progressBar(1, inputsize,reset=True) + for entry in entries: + if entry['taxid'] is not None: + try: + entry['taxid']=taxindex[entry['taxid']] + except KeyError: + entry['taxid']=None + if entry['taxid'] is not None: + seqcount+=1 + output.write(ecoSeqPacker(entry)) + else: + skipped.append(entry['id']) + where = universalTell(input) + progressBar(where, inputsize) + print >>sys.stderr," Readed sequences : %d " % seqcount, + else: + skipped.append(entry['id']) + + print >>sys.stderr + output.seek(0,0) + output.write(struct.pack('> I',seqcount)) + + output.close() + return skipped + + +def ecoTaxWriter(file,taxonomy): + output = open(file,'wb') + output.write(struct.pack('> I',len(taxonomy))) + + for tx in taxonomy: + output.write(ecoTaxPacker(tx)) + + output.close() + +def ecoRankWriter(file,ranks): + output = open(file,'wb') + output.write(struct.pack('> I',len(ranks))) + + rankNames = ranks.keys() + rankNames.sort() + + for rank in rankNames: + output.write(ecoRankPacker(rank)) + + output.close() + +def nameCmp(n1,n2): + name1=n1[0].upper() + name2=n2[0].upper() + if name1 < name2: + return -1 + elif name1 > name2: + return 1 + return 0 + + +def ecoNameWriter(file,names): + output = open(file,'wb') + output.write(struct.pack('> I',len(names))) + + names.sort(nameCmp) + + for name in names: + output.write(ecoNamePacker(name)) + + output.close() + +def ecoDBWriter(prefix,taxonomy,seqFileNames,parser): + + ecoRankWriter('%s.rdx' % prefix, taxonomy[1]) + ecoTaxWriter('%s.tdx' % prefix, taxonomy[0]) + ecoNameWriter('%s.ndx' % prefix, taxonomy[2]) + + filecount = 0 + for filename in seqFileNames: + filecount+=1 + sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount), + filename, + taxonomy[3], + parser) + if sk: + print >>sys.stderr,"Skipped entry :" + print >>sys.stderr,sk + +def ecoParseOptions(arguments): + opt = { + 'prefix' : 'ecodb', + 'taxdir' : 'taxdump', + 'parser' : sequenceIteratorFactory(genbankEntryParser, + entryIterator) + } + + o,filenames = getopt.getopt(arguments, + 'ht:T:n:gfe', + ['help', + 'taxonomy=', + 'taxonomy_db=', + 'name=', + 'genbank', + 'fasta', + 'embl']) + + for name,value in o: + if name in ('-h','--help'): + printHelp() + exit() + elif name in ('-t','--taxonomy'): + opt['taxmod']='dump' + opt['taxdir']=value + elif name in ('-T','--taxonomy_db'): + opt['taxmod']='db' + opt['taxdb']=value + elif name in ('-n','--name'): + opt['prefix']=value + elif name in ('-g','--genbank'): + opt['parser']=sequenceIteratorFactory(genbankEntryParser, + entryIterator) + + elif name in ('-f','--fasta'): + opt['parser']=sequenceIteratorFactory(fastaEntryParser, + fastaEntryIterator) + + elif name in ('-e','--embl'): + opt['parser']=sequenceIteratorFactory(emblEntryParser, + entryIterator) + else: + raise ValueError,'Unknown option %s' % name + + return opt,filenames + +def printHelp(): + print "-----------------------------------" + print " ecoPCRFormat.py" + print "-----------------------------------" + print "ecoPCRFormat.py [option] " + print "-----------------------------------" + print "-e --embl :[E]mbl format" + print "-f --fasta :[F]asta format" + print "-g --genbank :[G]enbank format" + print "-h --help :[H]elp - print this help" + print "-n --name :[N]ame of the new database created" + print "-t --taxonomy :[T]axonomy - path to the taxonomy database" + print " :bcp-like dump from GenBank taxonomy database." + print "-----------------------------------" + +if __name__ == '__main__': + + opt,filenames = ecoParseOptions(sys.argv[1:]) + + if opt['taxmod']=='dump': + taxonomy = readTaxonomyDump(opt['taxdir']) + elif opt['taxmod']=='db': + taxonomy = readTaxonomyDB(opt['taxdb']) + + + ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser']) +