From 64838c1cdfd3daa5dc5464e90391863b8f59944b Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 4 Mar 2009 22:32:55 +0000 Subject: [PATCH] New version based on sort them merge algorithm git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@180 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- src/Makefile | 58 ++++ src/ecoPrimer | Bin 0 -> 62448 bytes src/ecoprimer.c | 472 +++++++++++++++++++++++++++++++ src/global.mk | 20 ++ src/libecoPCR/Makefile | 30 ++ src/libecoPCR/ecoError.P | 15 + src/libecoPCR/ecoError.c | 26 ++ src/libecoPCR/ecoIOUtils.P | 15 + src/libecoPCR/ecoIOUtils.c | 122 ++++++++ src/libecoPCR/ecoMalloc.P | 15 + src/libecoPCR/ecoMalloc.c | 94 ++++++ src/libecoPCR/ecoPCR.h | 270 ++++++++++++++++++ src/libecoPCR/ecoapat.c | 202 +++++++++++++ src/libecoPCR/ecodna.P | 5 + src/libecoPCR/ecodna.c | 153 ++++++++++ src/libecoPCR/ecofilter.P | 5 + src/libecoPCR/ecofilter.c | 19 ++ src/libecoPCR/econame.P | 15 + src/libecoPCR/econame.c | 61 ++++ src/libecoPCR/ecorank.P | 15 + src/libecoPCR/ecorank.c | 52 ++++ src/libecoPCR/ecoseq.P | 19 ++ src/libecoPCR/ecoseq.c | 227 +++++++++++++++ src/libecoPCR/ecotax.P | 15 + src/libecoPCR/ecotax.c | 329 +++++++++++++++++++++ src/libecoprimer/Makefile | 34 +++ src/libecoprimer/apat.h | 120 ++++++++ src/libecoprimer/apat_parse.c | 65 +++++ src/libecoprimer/apat_search.P | 17 ++ src/libecoprimer/apat_search.c | 156 ++++++++++ src/libecoprimer/aproxpattern.P | 17 ++ src/libecoprimer/aproxpattern.c | 236 ++++++++++++++++ src/libecoprimer/debug.h | 29 ++ src/libecoprimer/ecoprimer.h | 238 ++++++++++++++++ src/libecoprimer/ecotype.h | 14 + src/libecoprimer/goodtaxon.P | 17 ++ src/libecoprimer/goodtaxon.c | 27 ++ src/libecoprimer/hashsequence.P | 17 ++ src/libecoprimer/hashsequence.c | 203 +++++++++++++ src/libecoprimer/libstki.P | 17 ++ src/libecoprimer/libstki.c | 379 +++++++++++++++++++++++++ src/libecoprimer/libstki.h | 89 ++++++ src/libecoprimer/mapping.c | 7 + src/libecoprimer/merge.P | 17 ++ src/libecoprimer/merge.c | 144 ++++++++++ src/libecoprimer/pairs.P | 17 ++ src/libecoprimer/pairs.c | 321 +++++++++++++++++++++ src/libecoprimer/queue.P | 17 ++ src/libecoprimer/queue.c | 100 +++++++ src/libecoprimer/readdnadb.P | 17 ++ src/libecoprimer/readdnadb.c | 35 +++ src/libecoprimer/smothsort.P | 10 + src/libecoprimer/smothsort.c | 265 +++++++++++++++++ src/libecoprimer/sortmatch.P | 17 ++ src/libecoprimer/sortmatch.c | 51 ++++ src/libecoprimer/sortword.P | 17 ++ src/libecoprimer/sortword.c | 44 +++ src/libecoprimer/strictprimers.P | 18 ++ src/libecoprimer/strictprimers.c | 170 +++++++++++ 59 files changed, 5196 insertions(+) create mode 100644 src/Makefile create mode 100755 src/ecoPrimer create mode 100644 src/ecoprimer.c create mode 100644 src/global.mk create mode 100644 src/libecoPCR/Makefile create mode 100644 src/libecoPCR/ecoError.P create mode 100644 src/libecoPCR/ecoError.c create mode 100644 src/libecoPCR/ecoIOUtils.P create mode 100644 src/libecoPCR/ecoIOUtils.c create mode 100644 src/libecoPCR/ecoMalloc.P create mode 100644 src/libecoPCR/ecoMalloc.c create mode 100644 src/libecoPCR/ecoPCR.h create mode 100644 src/libecoPCR/ecoapat.c create mode 100644 src/libecoPCR/ecodna.P create mode 100644 src/libecoPCR/ecodna.c create mode 100644 src/libecoPCR/ecofilter.P create mode 100644 src/libecoPCR/ecofilter.c create mode 100644 src/libecoPCR/econame.P create mode 100644 src/libecoPCR/econame.c create mode 100644 src/libecoPCR/ecorank.P create mode 100644 src/libecoPCR/ecorank.c create mode 100644 src/libecoPCR/ecoseq.P create mode 100644 src/libecoPCR/ecoseq.c create mode 100644 src/libecoPCR/ecotax.P create mode 100644 src/libecoPCR/ecotax.c create mode 100644 src/libecoprimer/Makefile create mode 100644 src/libecoprimer/apat.h create mode 100644 src/libecoprimer/apat_parse.c create mode 100644 src/libecoprimer/apat_search.P create mode 100644 src/libecoprimer/apat_search.c create mode 100644 src/libecoprimer/aproxpattern.P create mode 100644 src/libecoprimer/aproxpattern.c create mode 100644 src/libecoprimer/debug.h create mode 100644 src/libecoprimer/ecoprimer.h create mode 100644 src/libecoprimer/ecotype.h create mode 100644 src/libecoprimer/goodtaxon.P create mode 100644 src/libecoprimer/goodtaxon.c create mode 100644 src/libecoprimer/hashsequence.P create mode 100644 src/libecoprimer/hashsequence.c create mode 100644 src/libecoprimer/libstki.P create mode 100644 src/libecoprimer/libstki.c create mode 100644 src/libecoprimer/libstki.h create mode 100644 src/libecoprimer/mapping.c create mode 100644 src/libecoprimer/merge.P create mode 100644 src/libecoprimer/merge.c create mode 100644 src/libecoprimer/pairs.P create mode 100644 src/libecoprimer/pairs.c create mode 100644 src/libecoprimer/queue.P create mode 100644 src/libecoprimer/queue.c create mode 100644 src/libecoprimer/readdnadb.P create mode 100644 src/libecoprimer/readdnadb.c create mode 100644 src/libecoprimer/smothsort.P create mode 100644 src/libecoprimer/smothsort.c create mode 100644 src/libecoprimer/sortmatch.P create mode 100644 src/libecoprimer/sortmatch.c create mode 100644 src/libecoprimer/sortword.P create mode 100644 src/libecoprimer/sortword.c create mode 100644 src/libecoprimer/strictprimers.P create mode 100644 src/libecoprimer/strictprimers.c diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..9b213b0 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,58 @@ +EXEC=ecoPrimer + +PRIMER_SRC= ecoprimer.c +PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC)) + + +SRCS= $(PRIMER_SRC) + +LIB= -lecoPCR -lecoprimer -lz -lm + +LIBFILE= libecoPCR/libecoPCR.a \ + libecoprimer/libecoprimer.a + + +include global.mk + +all: $(EXEC) + + +######## +# +# ecoPrimer compilation +# +######## + +# executable compilation and link + +ecoPrimer: $(PRIMER_OBJ) $(LIBFILE) + $(CC) $(LDFLAGS) -O5 -m64 -fast -o $@ $< $(LIBPATH) $(LIB) + + +######## +# +# library compilation +# +######## + +libecoPCR/libecoPCR.a: + $(MAKE) -C libecoPCR + +libecoprimer/libecoprimer.a: + $(MAKE) -C libecoprimer + + +######## +# +# project management +# +######## + +clean: + rm -f *.o + rm -f $(EXEC) + $(MAKE) -C libecoPCR clean + $(MAKE) -C libecoprimer clean + + + \ No newline at end of file diff --git a/src/ecoPrimer b/src/ecoPrimer new file mode 100755 index 0000000000000000000000000000000000000000..13716c122e12ae6e4bfea9dcfa533af903040fa0 GIT binary patch literal 62448 zcmeFadwf*Y)i*vN6C5xwqb8avlu;)dG-9H}CIlWcFasxcz$l<7P!f|MsR3d#BX}j6 z1U)^BjlMjswYJ69s(ou8Z69liwVDthV68@M6{S|{rJgZf@CJCD_xs(K$;l9qKJWYa z{Cl`#|?79yPjFj%r=UU{_yu0%>z-(o-FXfA zkx1p7Z-+yb+6O$O%2uh;fO+kiAydlbq(zy9PEhVQ0Qz^iTz))&9l11#iFnRWEuO7I z>Z}(Y4Y~SXa=FeO@r4B?uablHEW(F%g34T(BdGO^!MdaV*Gwv&1Qj$~h8&hJ z#V1uhW90$m=U&|stzSNAdJ1V6JlVQK`CMLGyQIaz3us?oUu)oN4ScPEuQl+s2ENw7 z|Bp5BU6*0LVYI!z$FQ#V81b0T{IqRTM$fC6M#cw*`Rst(XzTG9ZDp?5Uf$VnBfvBJ z>}8m)eH5G>FEv(G-aZ&vBj zrAv`iFa+wG6_Q0b{{<9bVwkh4dc6~nvs8S~ElwJ3Z&%yjLIsTYb%6zj844`0cVA90 z_XiEcc+)U@9%pcBC;8aQIu}(kta&ILFR?_Ce;QV4akssO`E8r47lK7Q*}3L|dXW)d z6!01GXuxlnuh=)R5Yp?AhO9^+FJx7DD$Q=oa6kUs?8|&1bKAam!R5tHIhsQFE;mNQe zS7m&0o)rz`px_6F_q1*+T)}i(Wjs7iV1j3X70$~HkNd!TR4Cy2!26wTqT~nOOUC?u zwabOaP!gc52L*I#_lGPPuq#TS$oRC?Y&vr*)ISwc(r(GmK1t?Qr*glETq}llm^K&B zin}SZJqiy$@SYY5xIggvA_18Ev@+ywq*YOprQU)bvMvN``|tI9=nDn%Q0!=3%-({> z*4cRU8JYrO{2s7jp;~xk2+Zi$I0=pwm#GNk! zwX<9zio3VYV3-{uc|W40oK_k`U@rQ zg~V?}HyYNp(B58qOdZzGx?``0&6RZnKSxhIKMtdhUtERWce*e!_VxdT>1lD&UV+3R zfAHc;^qP3?1e6P2gl>}Rj8F6*f%><-orlJRnU#ztCA3x7?_g?n4xAl?g&)}5u{U-G zD7;K7ikD18G3c-BX_i`X3`uOx3z>TyMuMK$7Yby51e-;@yc1ec6f%j#u)?lN>tZxy z(x?SJcZC(*~hbU}i z22J;RZ^uGlB*x$?7+=d881XT$LH|&|C@26m4{qFs>cFzI&2FP-_Xu;V{llZ^gf-8xxX^AqBLX~IpL1&-u54$WJ$~ zSFo%nIvV^!-q5xn^xgJp#tqxF{)Lo~E7?oQ$`|B#Z@QYwOKuI9M++x=Z#oMQ$EJ^x zs4}ZvKHcNJ>2r7&r^UqGSA{d@ZOK9VJsJCTFj-MC3}HV=4g4}y;Z_KX@kNZ_a*(Ms zKbmiTG1s{9Z4cCKwg=Xu1d_Xu81Yb`y02ZTA23>?8VO9E?sIUo4+YTLB72z__CtY2 zIqru7O`<#y;d_AyVy(Qvh1*-?9dFm+7W{dOA=4!yf%d>1xE0Yrdtd`@MMTgZxCghq zQ1*U#$J+yyN-nPW$YTFM(nG7av_StRw!>j&*5@S#hCRyvg z$;jwW?q1rtV5H0SBy{$F^SR+GXxw^;{2!^jUmc>%p=9d<-gbPBgrm~szJdEI9q`{A z_qxn0c$VdHxf<|Ug^zyD^lre0HGa%3RO+-@SbBOCZpQUE+Y1ifI?}HF4of^1Xyuc= zwnZ;w9f`uFC|qf7d6Z@$%G;$VI-6eKnv71j*Zci{fpf5|FyL)pKT7cV<;{Z6Nb=$G zs=x~)#si~I7Y{Nr;tvbUv_C&lv)f(+oD?sbX{p@@6l=XGX;|g(#!sOTeE$#eJY-1{ zu+Iu`!cer=+p!bsGZ@nu7<5X}_d_O}&Rv0i6o|y{3G6`{6j7eN;}dq&Gd^4k7V|J9 z$4hPjaj*>`G(q5GpC#nI9(pVJDRM&Qr{FwZJe(KE1CtPP9!3sJGo0B}Wq-Md;B4yp z5#$W&+Ws6luxAFIU$79;&ya?!b%C7_YA^;x_!%;v3GREhCpn?G+kBp(=a(ZVerZDI z7KQnRXfOzPn_z~@QhuckQE&U$BFzj9XvY;l>)`yJCciJq&l&2LffGVqvZX>!1b)z( z58Vo-d{`~o;z&XDG0}&^@aG-HSt%Wirw(#aJ!wDt7m`3YuWkQm@7gcikkQI->7XH2 zT49g*76#zP0UN2+E)v7!gbG1i5qgxEt95jvxZ!QjK!b*? zg_9%kYbRr@^|rqxn9nFR;&V$ym^cryHp=?ZJ|WZQ5Uxjg3`4mefv4){|Dtt_9^<9Vp;MQ{5z zAy}k*v>ea9H}8Wxkz;yUU(^HJ4DAmlyMt}}Gj8ZpWX_9+N?aJ_L*-RD%{L)N3YVYh zZTmzZ&I|YxXQ<&k58b;03V-S*kc^kCgVD<==cSjpT!{#LiDMv4OclNbUIJL|fo&>_ z@+MA{MLFWZC|rr7BnzlOE8aQ7A^B%L0 zE!qkK_KOQZ@rnI10wHrx!Vnc0KTgO!Ee`BQPe)PFA?cOtfFgWr?oW^h26rMs7n>}e z(;P1uf@7bJ48+}gFBX<_KWNd&RwHiwH7z)vdpF7lH(mn~likK0DrBsqu(j*Opifgl z`I1?JVNNI18oQBX7}k#l`J@aE5+CnVHA&or_=!;!>r{mDVU~}Vct}B@JqK|TTTmX> ze2s0lZx?h0-GrZ=7>|( zGv^#}vz#MhwO|Vr!a1T)El6RO_-pJm>RIA+52j(a2a|mRw?)j?7%s2{FiSJy%~keN z2*x_ZsZ}wH3zdBw+m#qVO(WK{JJ&$`cTAg3-H4Arg*_&ovxUllO^h@q_E%<(gt;6o z#*bx~F(c5gE(&+N6S9^S8s5-L$u0KF0lM0A(J&vfsm)E5^#5UVcgRfGx6PN1(K!li zX^tmer?NzZ9on3eau@C7jIO@T+i}F_$)xD`$r%-@?|W~$iB0htzM$O*=A@j78)dln zb|i)N=fQtMvvipuSlQ=DMGye%hhtqYeJV9%?|X->#m88}f)POc|AhEme}ks%)3Wg3 z#?6e_i@P0j0!)2?Tx#x2g7wr*_6T%S1P5fJ23!NSi4poG`wndWmfaLjXfr4pwp#_$ssbH!<)n@$)4Swo>%kog_N7jZ9R#+p8cMDA>^h}PnyS={Ts)g0voAsg!Tn6ZJ#jq{P_9OnyPV|; z$OnU(%RT{o5^F`4% zqG8Y=b{-0}?U#-49>)xXQRLV{j-nBM^+j4EL{AG^*@3Taf}qK@k)8B=@H$w+<-(F5 z+9~Kw>7(PF=t-z+OmZGY&h;tIXM%I$JZK}>=Y}J3xdtBU%R2TtInqii?8_Pnn3se< zMAAx7C0qjANrmh-zo4#cE|6g@@MEqC?l)@pQBR_tmFDw7^C^30l~!1Eo|W~RYXK#* zeu@N+`DFC0wrhlrq7XQ2{&Tju$4nUJOR{Hm6IWIkop4-$^+lyoeJ-TQnupAPiWR+g zpMb@=>4wL1J?B-9z@}$v%0T_2gGy)P5m*v+idd_IAj=xVZJ5nv0ucS zqT&2(IEa6cHPDdxe#HDVVyfEQKp%X~5POIIK^s*t|IF&_q5k(%>8Kx?0jsk1kHO9w zGace}Y>#4LK5GI5gmCvmxS;vsY;!ll_mpr8DO_^Qk*pqMgQABlSqk&g-mtS%t%qX$ z_fZ=2pn8>Vxgg9X{y3J|><7^paQoTpeA4aLA?Z2>h?p28pNUWX!pBCt4|a)(83vf_ zpc&@kSawJEn#mD_&eRqq!qVwMR5sK;BiJ*+qUk$`3&bjD#QZX1?h7V2+ds#k1z|8& z=l+#mA1)k8*9oE)y~#fNL`Y%O&O-?^j1Qv7e=snx@129@EvpM;ulZe~LAFIG)gR^> zwZ|Iq?|35dtRN_Lc?7wC*t=$f1x$RDF>&WYqsPwd1>Z%I@6F`Ppz{fOb^AvcuNH0u z;UvZj!#XE19t^d{fpw+CY9W@FSVj0sv_Q99pYKS{6)X4HUh%fKBFC`SVBAPXF}&rT zBT+9R>hZ|4|HL|4*XHR0xw61a78uV0cgX^NUEm|c6i^ijb@^G~S{8`s{#`yllh04W zbI#wZk%b|g&Ik&U-@-}~_d@sDzrtMzUm=Y3dCq3o z%ispX^=>*bTC-<3PbRb$oR0~l*e=7$yzE5@M#>y=ws_mW1RVq_Kfoi7_>({vjd4B% zKus>794QMi;)~t(YzV}j6=nmmZ`nH_C{V^pl$Vh}Xe3tbttv;C3^5-bwEWEX-KqHiy^8{imZpxE09fGd#|og@(uVehQrj0^ryG|C0At;}=e-$1a4Qib2==$SXGV6F$%l-gC9I7~0R`Jge2vvL+ zVbNd}?Mc8+)$JatFg{G(R-zqbuqEoY02Bx7wh0tdb^8H$%DOFtAkj*?qu;_qCJIsx zTlyb}a^eMKSF#NFUB3Zq83!Ko5lY|bZ17g(ab`lDaArae{stq1cuC<<+!sCs?i6t9 z?3}NhF?Jz1bATOOY8_F$3z2h)us^h&SPrtT+mdp^1*}QDzO7gqwf#JbtT))JIe{T$;#^goDDVg!^CF{m2FKQglH59yTPnzfJys@1 z^%-t-ee2q6_=-n92ASHq2n<>f*@yAzv(KG{s-eRg?A(Yt+kuSszBb$5_yPLswH|Xu zw*4D%>&BL?{d3-yn={<@9k??y6W6iz#DMn!CzidSU#D&kgDtW-K# zH<}+Y2}2K5;s>&s|AnOMI7pG)hJ_of;$a`o576E#N^h0{i#veHWOm1nH75%X&!ZJG zL`87UU=JYpW$2c)R6er+1}qVur#2C{^uCh#Y;+9g&$DnP1Lz{Z7MeSoh%Sw zfgiEJ-7FB#y;D9P&*wkJb9SY*$ddM8ShElteQIC<@tpEi?%2@-`O@%G^Mif^$488^ z{^&8~P43ux7(Os}>m`mzHkCKKqwmmv^`n8{I%oMpT*hDR38Rx#ls9=|M;a#Q#I0bC z48vc%W8i45FAMQ^PhQ!c=&s}@EGl8!vOhN9uzzf=0C~`V&6nnjsCJ2`{OssBp@DeG z#i*qH`;eH0j4`Z*p7PUUugk70`;_Jf`$tz6vvKt1y>OD{B`&x$QaT6D{TLG3N6nez z?PmDutmBu{j;Fl@z!jZtjTsHNWBck-?0kEyF`qXgw`@z)D4*?)WevdEQ~D|?H>fYk zpNZ@VXp$ok5TY#~MAG#I$V!h@^B$%;zXDS9&nzum6#MG|^3rc=dDno3Jxj`4A>>^_ ziQ1-}rQ|L74&aWxTHdPviM$ufh%AYuiyPlk-rv4^nDRb!<)FNO0}cC!=x)&O9e8eL zZMs6p`#k{E@3h|m?nr8R?_uu0tKa(~Lcixz-jh=DW*n}(9*4Yzl=n?~Npt+3CdzxP zl=oEtuIPmr7_;yIw!V6%w1~aml@eb>i#TTBlvKdc3Iz1N!?2Gxzb)kdKC02hO<}3; zAXeQ5eta1AQ3cVoeMCXSK2^$JD&)VE^0iGnP0K%~5)W`3K+8Yn|6cx6rTotz>6)36 z|Hik6kw17b`oq5SP%sAVq|TPXk7ND8jZV@{S~zy$c_I%5>~Ri!2NXGMdFuiKwuK8YA{Aft1{ zdD6)?8kY`Veh<+?E9xUvFb4~9a_VhAN+C^*8V3l-Ij&gB5DOVWo+sm423I&RIpn<0 zeA?cHSpoLSF#EY-ZqSP1L`+#xbRw9m@e{#N9_B5GKys43!Lq+c&p?P3w4xoZvY0D| z8P_7V5|%WCcjvrI9>szqO9cBR zw<75}N>sTo`ApB7S#5mi0Wqh*R{)Hp}MTR{@;$8rjWvYBA zmCbE65!1%q80y1$*fb24HwFs5@#7JZv4_Y8sI1+>Q(ry1vy6-__Av?MiG|eY)95ps zZMgw74k*~>+X4{PF*EHyiaP{wR2~ z%CoKsVa3ldpX*4*mRr{%Mv9%1nv16l!-p2h(}9PMKdkw_%G%9_)q*C+VYSF64=~F4Vbava&1&bQUeFkfKd^NKC2a*hn$wA&De%1=D^TI$?tw>DErRpW8doC}?((65 zun>Uw8%@@NCkR76d&18-B4!qL%Uf(JPS)*ZIN~f-*%t;9ocYzkKh+@}IKC4tUJTnC zx7nVKQc1MG)cM7s^5w4BKf@XCU>Z7#p?5rYED~&2i!*K~03A)0iPg|9aH2CxQQ8mh zm!cGVL^=YcFIpIa5zvsHh%~_AzL|o<14x1!{h)1bP1Jxc`ae#Ovb}Z=t}F(*nkeWOnd|QQqO7lwtr-D7`((bk|My$f}@mH=6n)X1h(_fh{n>)ki8|HH%^R00F`edc` z9}_p3{UP(qP#hJZ{ zqCwJ)JrsgU15mUd9-4#;xCiE^6VhV_2t~}E;F84UgVSKhV{S2?9hi>Oh#4RleK#>5 zl}!2H89b#3#vI&&@mWdq_B`B=GdC$+&7rFhPqwB+go3>tVUPhw2r*6xZrULXP2)o5 z8=;B2BXRc$p?Gyt9^p)#_Z0i5s)&)XizNo`!-0szc%gcpH>bXX*nE(u&fDHzV03J+ z@UGuFo0gAbAl~&o3=n)&g5TUWYERql3<`82yB3yo*5iodml@t4bbB^=J3d4>QsH^u z+wm4|#PjwpY%~P_1kfeEf;=GO`Xj$MpE2#W-A5R0`!l`mALE&TGY#&t{v3EhUBR-g z-nGACCLBDD*VhF?=miyQj|#&~KqesXByhvrL3?5$Fv z3`HTWpfM1dh;!k8fyC8VA`N7>VOt=>yLLUaW+eXt1NE*wiyG;8$J@}W!M-UZx z?}$fe&qTWxE!}Q~xw!+0Kf^5GtSf^w&60COGA#$mqXTFs`?JY>KISNyeVokxxB}V# zVD{gU-HCk=L>DY}G!pZJ;=PFms3qkupz)1p5nkt-I}9s(pK)T3akBdbqpSyA&ihEW zTF%n;z89t9lkuWS&I^V$*04qx=19XF-K)mSj=k1&cg#I;f3llfmFWMK=4b5xPY37E zHn#-l%{HH!KUd7(^E!3}`!XOE4wWo($Br=ViF@M9GG^0VE=Aw$cxN_NhrR!~2@n3W zyS?Ar@qLs=6btvlGt@~#?V9rJ_4-YrvR?0c6pQBJ^cZhW0@MjWXT)NKR4vj;luoS$>C-WmDwD zoB-wNDGZE&P~OKk?dv!0Xv2{I+-_r(8;4@I$MTGFz8wU$@MynxW}j4#`#FT`QsS3m zYg6{Y@F`;Mbo9NUcYKpC`VY9n@6Vm!E~+C!|LaRflf z=|@LKU%>uZrSSwL=7mw<#bUhc^U1(CF@7X5%6IckJhW}u#|C-#KY7oHoHac3ri5g2ICjO8D%|%`*`G>FC+0WpI*Yv^?oB` z(_nwWYM=3(4O4*81<_S9g15LTliihA7JS{@GhIwp;#q4!1l|Gbeevvb6swh z(fugNMMoQ_XL1p>=dDaA%ZGC?+V{}~GJf94k$5)=W8{_xGH~)j9>`#<*^vyIdxPfR zBHTfI43)zuYxE)7!Isqwu96_hkqIXqc(Y-KSW<{KN8%W&D-oBOXt}p&gnR8fF=Sz= z&lZD5roD?cJtGf|9?xA;4rII%1JhoQ+pd!ZX&-$qth?uBD940G@xIjz1PIY%ig$FH z{mD&z?i*R8Y;N8v%-X~;2}~{*sq-yqOXy&;e+t;dOESpf7$833$CrJXAm5vq0HqD; zTTLx6n-KFsoSP8mK7=;(%WE+11)Td=TTAkwsgSvkp|<%2aFGx7sbb+rCzZw0U;=G z9PfXEE(HeT%-+n!c}Z-9i1XKDjukb`eNd3bqKXSzk*^vRk^y`w^%(Y)eR!4Md`ViO z{5JPs2zXv;H1hOL5?kc!jVelQb zDh5G=(_y*ZBKJSx4Pdl z&F7gpJ=0D1&2u`kdtzC5|H`zJTgzMWnqNFG?(QHCRC|0GliUxHm{;TlH{OHCn1!DG zcUq4y9TfVwlx*<^MDAxO7uizr^aWC;?7g?7Khr7YM&uhB=5TzwU?_f^Q=SD|`eh3I|W_9FBOo^N%w zFV1dYgQp>A%`U{zSga1cf<>`xzP2lx7KVWt!x3MkrlBEpH3*0UFp%e5NL$>EGe(%d zilZ${P6r3RCc6dFU~vcgwiVomiQSQ$kuxCTDaD8u4URE*gKkDbU+q2YE4iVLRZ!(9 zeh0Hxj-uqMZ;9rDqsToSw`q>zju(N5^%m(Xcm(ns^l7^13Rk+nNSuHy(UMY2sIEMa zJ7xz4RnY%6=m$5Nte@;bZ-M%tAw9rS!jOI^3a*m{x$e;;3eu2D;guW_i3&YM!g%)* z!RSIH5wklLuE_o(8`@~=c4I_{y)VX@?ZGbkJ2`%spW+1<4lxL5Ud>GQ1V_0$%bv!- z7|+_XK#W?gD2u|9o;AyDSm*mWm3{h#=@=^qE~Jrc*ZJN@jtXwP6bSL@PfY#Bsv{7R zh2j-YRMvI}`D#E9;sv+wmXU}LLS^T>*Yu~K?{oGCF@ZLp!|QY#K~PMWIdSGn`(&|X z@nO(=NkyqWW+FHI8v~QsLHdJS8L>s5;#|#-K2(i9R58_lqX2y<;w{`Au}aoKZ1|&U zSY-{GfTx&62rAY&>H^o1qSj&XXId!hqnpn=I}Bk?&yxJ z^D0CT@!am@HaxmtWM<=T1Me`KjOPe%Mu&v=r=Wof-9A)F0mPYCUX|x=S)InqRzK9HCM6`2i!KcOB-rK3KLb_9m<8)MilN z@I=TbAd7cfi{B~UHE@=Jli^f&CD%bs_h~~={^xk|4JP4ln(CDCn%iD1o9vOJiPD&p3i5G=?-PY0Oyex>qSB5EX zN0o3stpaI+NGO8eT9AzwsYE;=c3QWhGoK6@M0}Y$5qCq?{uW;HnUvDHA}6bcA3Qif z|7CxU*;GH3odZw8Q5-Kq?8R8&c->m!Yi&|0BZ7%*$E%R`E4BF>_;=wJ576a8^c8#U zN$6WWP+DHwCmGYtY`=HyT!E2lY;?Dwjcr(&A&Z;*I5YJECdx1fPtdx)aHcixB)&w| zal3dJ&MV$K)5`J7{VhC7nX9Z%-2E+_Aj3AwGiZ~UnNv^V+wzrdoAEm!saElzRbW2V z!SeZq-kZ+@5kS`gDiXccd(*cD>1MzQfFOb(Xo!s#v=$eIt@=W%DcfGjeNf;QV9;ZM zJiH1q5B8Ki3l?tv(>gED9LK{S^W*`mqXL4H;JfM+YTmo{Inh4UepQ6=QZHcVc#i2* zH_wv4j`bW+UqChmAi01m#6Hrdqqyk>Wb>s|8l*nq@g=r?Aj}R3)3p=N?cL8|O9Gnw z9t(jEIuU|sbo;-`lABrb@66<)e9vom=cL!jTm-L(O(6T{z?P>Ya_^Dl*R%Y~q`X$< z!OP~}BJ)-=Pb~XT-DWT_;gm zdUWwdYYXy=yP;Yt!G4^X@!Vr1;hRbL4G@09N9s+_LSh6{qN+W00pCcjoEZoYUn>%5 zYfwJ8(MDNOV}!vz1SCHDO@WvRyak9Tam0(`+t4vy`)hHoKd}}8m25SvG;aqW3@H)B zf?Aq=Dzh${X=TH#x5p;1lhCByFzN1?Pug!$qFjV%74p3^{qU5*xJ~m1x*zKR81?9v z{($L%!!P;aka7wVck?NZ*m2evFF6Z!@A?&ri_^vMk*&Cg)w%47e(Fnh(Zn}_1Z`CS z@%Iw(6C~m_1^YM@L|zDaKazPbGcN`r7-T&5zGqUnSIhiAF@KfJ_t`JVeBhVdBl2@E zM8YGXT~||(KOnynd%1YxPh9wkkiJL~ijvUHD0G1IHvuUn{n40&;%G$TcBIa5%<=mR znANS0Sl^0RmwPaXkK@X}`8?xfuFh64kipU_eqTm}rD%!n@$3a9N0gZ2bDd zh{|}**2?&}cEcKNWk$>qX1R#QP#FxNzXl!*FM3iibLzte4Vza522!&;mQ~r2866oJ z)oqv~F#T%#6uT5W1BzGLAYN(5j_3sZR?4d5`HL9iyf-gJR+@dIW_g*JcnvH^uGV4` z0(?t@Z7(NvN}pHA#;CFEbGp;+*{FJ?Fqh6 zG`Rl2_KJGZqAKQZFGV&QG*N^U@j!Bmc$X8^z#4y)PLK;g_dYF9!jq(?%`euy-7JgUCk8g1_mxD#wo@OjG*u(8n3;V^&IA+9f znbsNf9`2tI8U4N;U~nTkBV6ZeoDKc=Q|NKxMfWk)qG+zk1n$86M{wiWC}?-0NXV?m z#EpUIusl1#59b#IffBz+B+D+0osw0beaD}LNB9H*FFeRen$0U+r;^Jx$o&c~qGqz0 z=0a6i8AC${MMc%=$e&2sZzAccAk}+B^~&|wdcDlw$^7ZeZ)d)_Q>~W`{+<>WM$Kof zi~SutV$<=vHas6`W_RLLBUT2nhS*(B{*#|HXZd2I)o+I(vwT(#+6#nT*h9dkhdIm7 z1q+O2SLW10nFhhqDViY8T;xf{>?*uqVPQmRpxO4U|%43~B4kOQ*)~p=C z-kg;aJ3`rtX6*-ViuE|ndS6JeK0~s;TC)BWNf#$dl67Hs4rk$T8{ z4NYDsiJc)5+2noUJWA0QmR20*m<~u-u`q|eu%C}`9t9?7-?Uv=#cQVtg=CWujZaxc z9dZwC6%gV+iCfJYc|rLJ4y*P=5;nIa=E50wdY{NLv4MpbD$&nFx!2=vgsHJRE|aEmlp({0jmG zH~ttTuw{jVKWI!YFk9C!ihCcKcA*e<6LuxwtYKqeSH~_B7FOjmcg997?aSJ3aI|NO za!tJxKSG2MbH@$G@}O<cV7>#u;JQgMcH^T z11Go?#t{F9^$6q)zJI7rqoI0=-optC)S~Cr5%yfzFy>+FyIGXGm+9l>EH_`yy&+dr{SF)pyzz6R?EU8B#GoP0 zK(|uwDSaJlWnhD|?OnHd6!uB5YV`C~f3*&Lxa#?VKXE-wD3+~zu@7qzIAwQ7`Q)2o z2~n+HR8|Zb+!xp{4nXl-6c6)T-+~caBg?bkw}v@YPZmgF;?8X73L3@i_z?CY&BqYbdq7t^u>O}%_G=o zK8^upxS~JhuQh=Az>WCzv)84X=NE5BcaSenSjS;+7rM)52SDwnyJ0x)n{k5fcw{{L zS=r9(v)cBLSaX6{PGbX@+fo}zsvX4p7P*gLZAa)!$u0i_OX4PGRCt5~W2 zg71k%R~Ns-AKUxN#=+QFK_2saGA@?@8^7sOu?^JA=h2oH)-)H86R z%#q*XrJ%*G=#^Xn#}Wa5Snp@D(Qe1MqZ;i+6=jyHhSQCJdtXHgtZ zRL6OZE^Jdk6dy0OGbB5w* zq239E87{C8eZg8iSaN~C@wOjFOSD$v_ySHpHDapMF&|H6TbGN4VG*^Ub$)cRXthU? z@EuQI9n{4e5zfdTUFZawy}cU|KdA8)R1b-S%UAmGQzyv793%(BV1@p5p!4F3^6{cn zPWcR1v- zLKChhc27c`|MrBg^Q?lD7TX^cbDma~)$J?vfnPH+UyGP;aD5b$AQ(xxop+R+#5{|_ z@^>2Zu+*0q=1;0nfn1^&E#d8W1Z0Ep4oU($9wEfb%|s$jveLYRh_AUr4Nc2U!Z#ov z&a~oOdcmREpyN7_ZtJd7TT1txhz>EsC-%B~aWBV`bHM;($?YI1Oax0xgU!}aN#T;S zF!!H_ikZFyCxC{v9Ig&K&G=zh8fBih<4vsnXyZxz6o6FwptZuqJDsTb!3E`C;YqFY zi@0KlpE32e+fYOhbLYuij`%aAt3+#-;>|d`zvvfjHwVJ^%@7u=w*I>=M>%o2*#5@j zkeKM)B@?K@@<^b_+x{quVDV%*rYG|EC@bTI6RdDfc^KCEG5N&Z6D%KQLYT1Qw^y+q zVJ*wSiN20wAyEXDCX|4T2|1>X6H;3b!_B9~Q?e>MLS?6W+fRchguOoNZRa5?^KU}7 zP+4JbEWxGLG02sYcg)0X{6=XYxy3>UV_uqo+F<*1<8nwIMBgkLVj;bMLNl4Y?6lZY z7{1aSM}n>}5B@gwm|H**$MA)DyiMw`wctj)EJpKa0X-o+XDx{xFaqJVs1>X;ahvxT zj^8-RS()4%D#J0zXHe$b*3^HAE@6AdvOmOL2_mawGqMs7;;R#FdK6j3OD;u%-<_8I zaWa_7u|)RA@ME-hR3-5mOw0URVxkbB0$l`0M)2wdO>4I}GGcznbJp=({$61a$5Hqr zThl;WyfE4IJ0f&Y`%_H%yC3Qtfc+JzG@5$+icrm-*X@0Pk3IB z=XLdA-BRl+4|jYw(}wYj7S?ga+s})S`wi=Z*CsnTjwE|B3*qRn>DRWYG_x=h+od+o zjndnKFZO#NI%=?gJQH_Cmk$}DANh1Y@bySCo;0CHwq6DoV&$VBJMCKrsI zj`0NE0UKv>ZxKszmDrW5oY?>Ecn$^dSZ|&8P>eK0?gnlt$EX2R?hcK{QNeEN6Z~% zlk-;nDV*`N;+H4+Rg>Q)5GOzXzsHX$qUoJrGTB_r~;1YQ@s+Bx+{qa+E+YeD1zjF)q(7zAS z3MptQzYXp9gVwxU7xa>-L618K5*lC#k}rkyCt>WZq&oQfyqEwzv0)tyLvC-w72`Ao z_lfZv>f?m{Y{UYGI5FOJA296L@ppLm7sKJe& zA`=zsR)bCx2taA2_-(BFnS%{Rk;?stK{fV*Fc$GK+>2k*T7=)N#d@glUOdU8`8$weyahN}XZCHV#SU0ySv$1?WIoVr=_t4-d8og_uhqi;}E9m2Vkc~dA?R~$X z&i2HCr%l!s*%9k>jsYV;ky&}%iUw8iyacbY?TX>x%3Se2Tl-27P4o8=EG0u6#$QAR zQY}Ui$P}`gJ>iKTh00I#-gG%ghstZ*-kZx&4(|_~-iXl)?+JbbSn-lI$HN!Wammx; z)$!|bnjP!Oe?W6!8`peGZ2?q;GWHsoEq?o952%1iDHhVb?WW+3CNJJTFo9mpFv=Hr zyzLj@InGqHbsJ?U>TRzP<$MOV5scUik@97p*t2b)V7t>08E8hsItCl@JYa(#uBSR` zx044vfLA1dd`(vIoqBoL<2Xv4Nx z9$VVmeyXba0@YwcjiiI;Sw)i?<}+4vw*9l`MGPYbAKIoN_R5C%d(n3MWxXr2?Isk* z$bl`jw?Rl2tK|9Rtq{eb*^YY$`!BlLRO}8w-Mut!RH#DqJZKFYfBZG=aoB3XZ#x_2 zkrDF>*ox1(#)DPgixBUiHbR+DC{N#Mo!b2UhT+t-1}B){3riEvLjWnCFtfE3Fv0P; z6X6r74&2X%Km+Es9VcwA8L?^Ce&pEaJtBzU2LK?I2O2JdZVRGO^OeLI+=GMd5EpHp zdkm0*8{Y)hjXmt{2<{_+sfa8l38E=Sa_aBdws_eNMw$2mV!&3$dDrfQ7Fo5#Gi;Zk zww06knP@C9N?d@=WIYR1Z(U)MR4Mlt;1V!N!jHzQ$P<%zF-1bB@RpEA?Yz#3pUhW^bad7>@8l5OcdqG&~&lsP4BI6VMv=&Z3-uU)1vka~2fX$?Xov@ms+dAGjH}&HP7&Up#-Ij(2EiT4Me=-G zauZf_+qykvPu}oRm~$Z3^z#wmggUug zQ{C8ef@TV!M$3H@7=|zyVsg8~dy9zRaGI~GcozasJS{r%0)#jX9xOaw;Cc3~ljj9O z&)T6A_u!`rgV|F1km8*v+HDKY?3ZI^xW>SHO5%KA$lMw{eR6>9VuZj4VMK)Q384wm zKsOA)BEbT2AfKXN<_&a7!IhuKG4aY|U$_jfT2b#}*AA_OJ$b6xF(y(8i98fR3;h%( zukm&J;Md#4&nC(CvA!#thXc!cpH@Q;)Ia4UA%@P ztt~X_Q=03dFozj2!%8<<>UxODZWbmRa~bi+1Y@C@lr>xrW+XY>2rvLfbGmX<~0f zi;-=DS@F%>;OXj~f5hH&7 z`{MgU@%^#*-Y3345#OJQ?=Qr6QhaA%-zJ%#DZaDB_YvZIl=wbUe2)>|Uh#dD_&!E_ z=Zf#+#P>JE_X*;Ay!f6VzE2e2CyVcV@qMcJo``Qu)yH1~DH3~rfLBW5J|7 z&uw|nBddwOJD&H;``@$7W_h2%`;GGc4BkH=@2}?l26F z`Nov|*;Df8`tvV!ot$5Ha-l!JWwgtmUpFPc=q&jKfPbT1P0bC<>zkJpk7}vE65mx4 zJWGIQjjCDR)Y!1Jp=L?#%GiqNsQi{CEi394&>c8#9D;))e3SFP|zYgVsZ;cu+Js=jfG|J3}JQ%Ae% zYgbBXCe>n?X>MNG>~GKon`>5F;jd|ITv=NaZCJSi6RnyRD^^DJqk4Z+O+&L^l#-OA zHJ3HkyOuWB*B|&)(gypc=K7@#t2Gfp_l(27M!Sk86}$Y9S}M(dQGIg@Xb5Ga=J`wM zqf)$@>-m3=YoqmbOCVuGoxi!>S)Ww7`qj0KvAW?)Evua$np-!eaY|%L{gj3&%~P(N za`u##DXXVMr?@Vxuc^boRhEifx%_HB|6lNBH7)i2Ns}hI&c9+brE#T-p}AxAD{AY9 z6dUcDs~q7Bg)>f&Fy)Sq8c=dWK)Q>llUsmnk7RkH$i zhw{Rn>YE4A=BU&#y+SgE3}_mqF~46RLZgFtP}G@JR_emS)=XNo zw4|Z+l?Fdd_CYg1B~}}aHP^dX>tIt;^UBo?PzicWeN%l+w8cL~xQNSFuB>xiR#PWh z_CP(THcZJqsE-5MdH6D@PE9lhZldRi8n8FFz?9jCCZ0XYaW>-+UBC3ik{OYmpgz71~pd#CRnRZz#o9bm11gqKg%U3pE4aHo( zynaR0FB}|y^-5B(q%PJ1x-g0u&Nq*5a54(P9`t*vjN zOCJ(p zmcia7_+zlnFFLz!QcK-xm#~)=_^0-?i0I0du(K81aB;a#_G_Efy$aqvT7$a5-NH0< zgive9)kKiYf z(;slO8*Insl`Eqz{m(P!taJ>BY_j?~*X8vqVlA$vHOm_suXbI5de*I6?rMoO)i-P4 z0BIcLQ+d>vsC0hIq-N+CNmM2)kU&q&9AdzLTLu*&a8uN38*5rx#3OXry1ErLb(aA* z)yg$meRY?qZVri;v@Azmm$j^Hj=C`Rw)jsu#ow~3rpblp@-avhv`lL9pW;7zqJP>n zf3a&>P0KRfrpO(JgYqIAv~)o~v7ji}3-pX2Mvk;}vFUM`1Dk`j8Vm+o=kUr8dj5ly zkFG?>5v@lwA-Mb-<@IpokZuq=6;}>=O*TDhJltT*{-fkOXn9RrhZX3h%T~tVtT7V7 z$bc6|%IKgPCv4Ed>WD%Aj~;mzgui(m*oST{}!11cFOOo6&rv^3Nqu#$>F-WkP3 zh#(L-$m;+(=yoBCLj(*+_5SFx8ZFGwSZ$E+XjctG1{KrLm~b7Wy#_-d5p&VGpw6+z zD0e4$9Rg2Yrr5MRral0*@aN*eClmbAk|g)3^m68^o{zjjIC^rEoWQR zi5jtWXQ1Kgo3BC*G=fxFb95O(T2Z|g_-;nEFp$xQp)V=Zf%fU>p)x2PwnZ>Ih|znc zw~J65YJgmSO>=Y2)uJ^HY(LSxoW+Fyc3?{_vRI2G=ej@;MJ1QU8XFZ^SFiT-Fl3q5(CO1DR;)t7;l!aM6QLQuP|r zziAW)6!l+)rX7qQMFcyH{`8C*MjB-u)8%yJLu5|ShO}d998klkByuP^!=r092k_U{ zC#OckBRiw>rJRqXQxgvbh5DgU{3R8vv&*&n+#Gr1%DCo2SH@~zhA;aF5BJZpeJMV6 zI~Kffb*OSWDa6fGd=_-b`~^D04P@6?pL-;erZ#L_iHqfFOt+}=DZ|p`-N+nUkt)vx z^SkibrpiAK$)lZjs@1(H^gOiJ61SUYlER@{AS=J!XqIlUX(n&Fy^^do#K zZkBStfqQX@61Qfq%W~64w;cf-oIG-kt%|$XqAtYazx3X(@P03l@FnoRKO*jSa)+dIq$oZ3rR%Z!5c#afGL^6UnWiuq zH`n2_K;_@7Glada&1xGdOevnZ&h6$a5MzsO6(orFFh2bXuWu+G3|sV%xEmdXZ8Wy( z`y*~et5Yv;vjfA+o((QtDSoaNzPG+ioPQ_Dllilmq z%(zf+08bF{D%!Hv;+;FvnT!wf3VtH#d{Y~g(xXiB+A`NFU;2>L;KB4p zdeqzgpGtaB+>1-m1DR2P;ZazPvHmt8h07HHG!LJOpGmyI_7vq(?MeFA;j>JYqYlJ1 zNM9f*`bC;P?dxHD_9(pPB^D}w67I%U#51YlTSrLkN_k#K&V~(Ao)xL`L*-#zd5Fta zVRX>;Y_;>gf!lt$(7v)DKdA(z`{` z`$f9^g2?|n`L*BOcCVDbOG=M^K~KfuOokr*bx!n&R0ySFQ)l7%K8*O8x`I{iIw_h{@eKMSNY*o zj=W(${dvJdlJ6;L`GtV#&r4N)3SY_9Hp0!Hq#Ns3cn8z#WaN`qg`&d~^5Pnd)2M0@ zr)A>?6AaM-i8l|Qg5OH{f972axYU`O*H!&wfJkn>@7rM_|?>%mot|l#+2w1nGXfgv2X-zV3`uqbOI@Bg?fD znU~TVd9KH2gUVmYJYFiFay^XC9+m$L^LWW|@Bq1TWbQ%z&FW234xFPNas>cQ#Yg8? z>RcsDldFnFE=Ns^v{~0<%rOcm-e2uTy|8z0tFG7AP-u~ki)ESO; zybk%xRQ@8JuTtXQh5Y+fzCSIW?fjS{|9|Uz)y}DSVXYhp*bZ-_d;#8Qs@xA!n4fa42TH{jDd*i3kC(uLGvx%3AqJ z7ni^j%K~B@Oe_mH*1?GP06zRp4_R3&T4_=^-J;eCH`m-(X*dh7x)lzW@ zy$!A(Lf;3hA5sUc_ysLqKT6@r`m4BV=s1?PE+;Krt;%i4q}*bBWP5D;tE78HnqG&} zRg0*iBb)^?GX)f!F^dXuXSm&NEY=NPnVI5V zQSo&zNv?6ZJSyc=^3B^IQ=dvbDs}19HT<=0wZsdl^bVCqRob~q=0Bw1MJoNPN|&j$ zTb1)DxJsq#RQkEf-=or}RC>Qk-&W;zt8}M&-m2ioRGOpG*HwO@f*)3}U%{m+y-Pj6 zN5R8u;p?YgYv5}Qe64}6HSo0tzSh9k8u(fRUu)oN4ScPEuQl+s28L>2Cl)lh?y9e3 z8hj)c)R$sLes;!#V(FH1^GW!62+MlBpN9N^1Lm^;KArgRB0L9QPMG)=ScqZXc?w^{ zd`A3ge0UKiel-DHgo(dF;V)A78fHH6vE?T&!o+vN#P|6mt(d~su*UD_4K60ccf!Qq zjYDV5yHVk5n9s<64?esI6W<9FzyD~Nce}#Zu*UBuGA@nZEg%UKzZ1Jw%)3|NYgpqq z@dg(Y@^`|-cj1Tpm^`BJHOyy1f4srPWH|h$V`bjc3SYw-e?hiDPvJY^Iz^i${tEm$ zSyX&yVH?{Cj|DuO{khJR{O2K`_NQUZzg5vBO!=KK>))#I>lMC+HGWkZz7r;XzryEP zWb)Ur#_vwUcf!Q)ERyoCRrng#_`B2aoiOp6iY5LX3SYw-zbOsh2@`*}!hb;FYgpqu z>pKSzoiOo>&XW9l6uyQvesvmuCrtcKh5v%Czk)S>MH;>nCVsyvpHTQ3*7y&l;X7gC zx1KHK|3u+y_;B%yuu#mhN8n5Z+h4+tns_k+c5f=x3228n;Ef&&Wvu7V2`e2sz&75qH~7b*Bo!uZ&~ zl=SXV@E!&KmxA{zc%y<{_sa5{6zowj<#qa(r;zXTCocFUC+q`E|1vpkd~3o64r$dQ zpY5w*J`3RE=M63<)Q=Np`*a#IuSMZ&nB|FI#T#5qi0_1nzq?ZASqfjnEKmG?e0XVm zCrtc@&XalfD0~fTe0?rJ<2zxe{?h$(7w9?lw*~NU{`sMN$$u>Ni>N;hYyLhdeoB5P zO!>PN{^<%|!-tFCukg=P_!>T3{M`zFvBKA|#(yYHekV-%JFz{+vJDDf!y3Oo4c`e9 zzxq^(&mZTb{xq!duS&z$a4X8QKW)2P;@_^|egzZX*}m!huM?(zTNVE43SYw-ze?@<5~hBgF!B4tlK)hNuVIbvvPMG+9HGZs7_!`#u zP>;9>&%u|5*P%S^vtom+|9>j@QU(8*NSqjY5oe<{HxOVJ7Ma(Q-ZnbYtnDRSe z;#XZR`QM`OHLUTg)9{@z@gHiC_;)IN4Qu??H2Iw{@r%AI@$Xak8rJwtY5biq@i#O| z{7nj9!x~?on;<*~UryNBe(B@=UqR2=e*XZ>_Df$6*nO(xpO1Ml{f&k-e;;pfF(H2^ zO!@Z|$h@-^zJ^&ofKL@ZyfnTOCVs_4nRl+j*RaONvWB=cz7sxF{VxDLXZ;re9@0(*U6H9*3nY`8rJ;LY(whrgsDHD!p~9o8rJwl zY4}c9=wIQVr0^>gtnpiUgNw-=d^us_ccYP+cb3A}FrTsg_u#{eF!41ULV5bb?z?3k z^ADxZ3sBzKo|gh1u05-iJ#A3(XqfaVj~^digei}o04~C;Pm{ubK;dhc`NZFV4==*R z-#`EtVdDEvllOmA_!?$D@qPI4B20WIOnkq>e^TLVSmXP7gNq6AoiOn$PM3MR6~2a9 zUdYcITug|+!2uJ0Hw>JQ-dFe<<}>2!_m&6~U&AA?uEp_h+y;sN4Z>*O-FHiPl7@dN zVd6X6J3XEaqkJ98=xZ+E;l`hbu9WodL4E*4HB9=H*UuYVOen7tW_|mcW!`TUzJ^); zaP-v_mH3Y-d<|=SeeR1e`8(l5)xQt)ob`VW@UZo-mHbc2QT11_=I;kzUYfsOKoS=9 zSLM%8`2GJT{vK8SY=y63&3~4IAM5Xg4^{s%&~w&51bDdmyQa&!@SlvZ{WYxlJL^mS zPMG!gEBw0@zJ@h^QJVZtnD|u+|K|!{!y5k^Y4}dqS^xC$`+m@K*8g{ahx2#+rLz8? zDE=DO{PlZHgjs(ld?@}$V4c~?{}{l-@h?*R7a^be*RbaQ;FYKWKCWVDE=DO{CB7Ecfyq4H%;RIPT^}<<2UgJ z7n3>oa>7D?{~`1KsPJ1Atnu~xY8qd|Pon%OZkOw=nG2k@bH-%dcRK zU!8{Ugo)pz%KuT}Ygps^)9{@z@jDg%UlhKEHU2|s_)eJk71dJy=M}z&HU5S)d?!r& z-3tE|g|A_a-BY^VdA?kllY%2d<|=S{k{$1Ir!4>m^>M; zR~=-1Y6QyjKNzpWx+QT>Q1Au?PXas^8G8O+pJ>DFnU~T{6|D+&a&##J9zMk(*Q?Q=D8HAz#Jf-g{1^X1dK$X}1 zvsS^nzeE*WQ7!G`dIf8LvR1=tytqxldi=Rl!9^q~MbT1G1R)}BL=hx$5kJgirbj1kjLC$kkk2qZV_f;{-}Dse>FQKfCs7d8O%M^p zU^K*kqM~sT#jHe85J3=BaN|Z@=|WL=f+%?IuRgCkVLClv;HB%lS9Ra-zTd5yb1hxh z*!%7SA1exj{-%e2UQOtHPcJ8J1Ol73vVeSdITv3(ElpHowGR~A z_h_Few)5EMhSgsFjbgL6{-oH>@4qWHd;C9&?R$@_&{s4b?E9k|72Ert&4zE2^4_J` z&ZoN++xKG+DYoy~V#Q}0^890VzmB(~itW5}T(Q}oPb;?X>&_{jSAF;h_>$lf%wqEI zOU6Nlzw_{q-u*v&_zw^N<>7xld^uhu6!X8z!`FKFdJk{%@U0%6^6;#Ow|V$p4=;K6 zVGpl(xW_nezaw7y;~sv>!^b>)(!-}be8$7)Jp8eTzwq!^f@x{ue?)r&I)>m`1X@d< zLpX*&e=Ghx!f}Kb5MD$$fk5l)Nd)2`dKuvrgjW$>LwEz>6vAnQHxb@KcpHJ%;CB$- zML2`-9>OySuOkra$NyCZYJi)9<|Cb^OVWPb)j|9W=2*AV>VT{{aE_Cf!GP^LQFNf+ z3*e_>wO5NdlMzY8$*!^5BTCY8 zq;s5yo0y)Ooxwf*I0jp@SD|eLf%w1*dKR-Jg zIFNrvVDb%I6D4o9CMwfYkdnm;5BS9N))~6D5xK@nzk9%~GDE1ubfR{KtafMGD!5hRX13Z@e&6h!fz!gEh`7}Vfh)7q5SCOyv$`8B$IRav zxE*1)+N#Bkl`2uLhJl;w!)->DTc0l%qa9J2f|6TO7Nf-o9YdU~_C$XDfN-w5J+&^V z)Ip{l=;huB*%_$z>w(+*;P$xP(BDJ&Z5=jw*`K6tP1hsrEKi^837 zT%wmtDAEFG+#k&fwE$NpagxC*$i7X!)R&&lV$deq9?pS8xy{Vq&aJMv){#~No;9H4 zHST?ofmd4x7o+`9%Q^DGvdCT$9O8|36;!gVP74i&IP?>uvzU56>@?@a>%^`&S|use zT@=gSGUmzC!eGpsw9nTENnlx>9z54-UFT{B$%iJy;ALB1ja+VYdT!G`Obu2fWDLR436CZe|8!!Y&*NbTFaut;~^( zYfK22f)cr|2JtMI1^UqcQn1>ZiD@)yaE-t0%lBV79UK>qv;` zc0 zJ9ntgUSGAw!fv2Ssf&#%95T38{m>LY;5Rb>8)F*s26|lU>R9YO8XnsE~?-%SA8wBB``b z%1#I*I-BeWsIc&R-ot8>x5^`w9gLcHOQJwVk zDVN5&RM^na1{8}T?{L>8tCB!fhw{dDU}t>3&9=8a_V?b|IT(si*m@5=k`@ntH07A{ zqVY8$mqwM%P{5n3b!kzNMAC~pzO|jLO1obFNL4sM`AM{>(_B1a1!yJo)-#r3O(XL= zZyO=pwwn##AcD;KR5gC?B1Y9vPHGUId*2(S|A$o01W^Xw$$)G%`Rc^oMO5 zSzSh~`0y#OD397e%rApBm)tCqohjBO|6Cg7o}rS}&W{n`( z$zix~kt8iuIO!J6RdV{~^E(Eg9EFR%@y0JiE{zJ}8J4{>bJ>iHsazdUjb93rMP(p! zt4zivo2hTC95iRnksVg8t$JND#EOY_zo^gtH&&-~hSy=bxX4pZqZkA|< LS +#include +#include +#include +#include +#include +#include + +#define VERSION "0.1" + /* TR: by default, statistics are made on species level*/ +#define DEFULTTAXONRANK "species" + +/* ----------------------------------------------- */ +/* printout help */ +/* ----------------------------------------------- */ +#define PP fprintf(stdout, + +static void PrintHelp() +{ + PP "------------------------------------------\n"); + PP " ecoPrimer Version %s\n", VERSION); + PP "------------------------------------------\n"); +} + +static void ExitUsage(int stat) +{ + PP "usage: ecoprimer [-d database] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-R rank] [-t taxon level]\n"); + PP "type \"ecoprimer -h\" for help\n"); + + if (stat) + exit(stat); +} + +#undef PP + +void initoptions(poptions_t options) +{ + options->lmin=0; //< Amplifia minimal length + options->lmax=0; //< Amplifia maximal length + options->error_max=3; //**< maximum error count in fuzzy search + options->primer_length=18; //**< minimal length of the primers + options->restricted_taxid=NULL; //**< limit amplification below these taxid + options->ignored_taxid=NULL; //**< no amplification below these taxid + options->prefix=NULL; + options->circular=0; + options->doublestrand=1; + options->strict_quorum=0.7; + options->strict_exclude_quorum=0.1; + options->sensitivity_quorum=0.9; + options->false_positive_quorum=0.1; + options->strict_three_prime=2; + options->r=0; + options->g=0; + options->no_multi_match=FALSE; + strcpy(options->taxonrank, DEFULTTAXONRANK); /*taxon level for results, species by default*/ +} + +void printcurrenttime () +{ + time_t now; + struct tm *ts; + char buf[80]; + + /* Get the current time */ + now = time(NULL); + + /* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */ + ts = localtime(&now); + strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts); + fprintf(stderr,"#%d#, %s\n",now, buf); + } + +void printcurrenttimeinmilli() +{ + struct timeval tv; + struct timezone tz; + struct tm *tm; + gettimeofday(&tv, &tz); + tm=localtime(&tv.tv_sec); + fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min, + tm->tm_sec, tv.tv_usec, tv.tv_usec/1000); +} + +/*TR: Added*/ +void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize) +{ + uint32_t i; + uint32_t wordsize = options->primer_length; + uint32_t quorumseqs; + double sens; + double speci; + float avg; + + quorumseqs = seqdbsize * 70 / 100; + + printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank); + + for (i=0; i < pairs.paircount; i++) + { + if (quorumseqs > pairs.pairs[i].inexample) continue; + sens = (pairs.pairs[i].taxsetindex*1.0)/rankdbstats*100; + speci = (pairs.pairs[i].oktaxoncount*1.0)/pairs.pairs[i].taxsetindex*100; + avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2; + + printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize)); + printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize)); + printf("\t%d", pairs.pairs[i].inexample); + printf("\t%3.2f", sens); + printf("\t%3.2f", speci); + printf("\t%d", pairs.pairs[i].mind); + printf("\t%d", pairs.pairs[i].maxd); + printf("\t%3.2f\n", avg); + } +} + +/*updateseqparams: This function counts the insample and outsample sequences + * and with each sequences adds a tag of the taxon to which the sequence beongs*/ +void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, + poptions_t options, int32_t *insamples, int32_t *outsamples) +{ + uint32_t i; + int32_t taxid; + ecotx_t *tmptaxon; + + for (i=0;iisexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options); + if (seqdb[i]->isexample) + (*insamples)++; + else + (*outsamples)++; + + taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid; + tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid); + if (tmptaxon) + tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx); + if (tmptaxon) + seqdb[i]->ranktaxonid = tmptaxon->taxid; + } +} + +void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options) +{ + int32_t i; + + /*set taxon rank for which result is to be given*/ + for (i = 0; i < taxonomy->ranks->count; i++) + { + if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0) + { + options->taxonrankidx = i; + break; + } + } + + if (i == taxonomy->ranks->count) + { + fprintf(stderr,"\nUnknown taxon level: '%s'\n", options->taxonrank); + exit(0); + } +} +/* to get db stats, totals of species, genus etc....*/ +int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, + poptions_t options) +{ + uint32_t i; + uint32_t j; + uint32_t nameslots = 500; + uint32_t namesindex = 0; + int32_t *ranktaxonids = ECOMALLOC(nameslots * sizeof(int32_t), "Error in taxon rank allocation"); + int32_t taxid; + + ecotx_t *tmptaxon; + + for (i=0;itaxons->taxon[seqdb[i]->taxid].taxid; + tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid); + if (tmptaxon) + tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx); + if (tmptaxon) + { + for (j = 0; j < namesindex; j++) + { + if (tmptaxon->taxid == ranktaxonids[j]) break; + } + if (j < namesindex) continue; /* name is already in list, so no need to add it*/ + + if (namesindex == nameslots) + { + nameslots += 500; + ranktaxonids = ECOREALLOC(ranktaxonids, nameslots * sizeof(int32_t), "Cannot allocate pair rank taxon table"); + } + ranktaxonids[namesindex] = tmptaxon->taxid; + namesindex++; + } + } + ECOFREE(ranktaxonids, "free rank taxon table"); + + return namesindex; +} + +void setoktaxforspecificity (ppairscount_t pairs) +{ + uint32_t i; + uint32_t j; + uint32_t k; + uint32_t l; + int taxcount; + int32_t taxid; + + for (i = 0; i < pairs->paircount; i++) + { + for (j = 0; j < pairs->pairs[i].taxsetindex; j++) + { + for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++) + { + taxid = 0; + taxcount = 0; + for (l = 0; l < pairs->pairs[i].ampsetindex; l++) + { + /*compare only char pointers because for equal strings we have same pointer in both sets*/ + if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia) + { + if (pairs->pairs[i].ampset[l].seqidindex > 1) + { + taxcount += pairs->pairs[i].ampset[l].seqidindex; + break; + } + + if (taxid != pairs->pairs[i].ampset[l].taxonids[0]) + { + if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0]; + taxcount++; + } + + if (taxcount > 1) break; + } + } + if (taxcount == 1) pairs->pairs[i].oktaxoncount++; + } + } + } +} + +int main(int argc, char **argv) +{ + pecodnadb_t seqdb; /* of type ecoseq_t */ + uint32_t seqdbsize=0; + ecotaxonomy_t *taxonomy; + + options_t options; + int carg; + int32_t errflag=0; + + int32_t insamples=0; + int32_t outsamples=0; + uint32_t i; + + pwordcount_t words; + pprimercount_t primers; + pairscount_t pairs; + + int32_t rankdbstats = 0; + + //printcurrenttime(); + //return 0; + + initoptions(&options); + + while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) { + + switch (carg) { + /* -------------------- */ + case 'd': /* database name */ + /* -------------------- */ + options.prefix = ECOMALLOC(strlen(optarg)+1, + "Error on prefix allocation"); + strcpy(options.prefix,optarg); + break; + + /* -------------------- */ + case 'h': /* help */ + /* -------------------- */ + PrintHelp(); + exit(0); + break; + + /* ------------------------- */ + case 'l': /* min amplification lenght */ + /* ------------------------- */ + sscanf(optarg,"%d",&(options.lmin)); + break; + + /* -------------------------- */ + case 'L': /* max amplification lenght */ + /* -------------------------- */ + sscanf(optarg,"%d",&(options.lmax)); + break; + + /* -------------------- */ + case 'e': /* error max */ + /* -------------------- */ + sscanf(optarg,"%d",&(options.error_max)); + break; + + + /* ------------------------ */ + case '3': /* three prime strict match */ + /* ------------------------ */ + sscanf(optarg,"%d",&(options.strict_three_prime)); + break; + + /* -------------------- */ + case 'q': /* strict matching quorum */ + /* -------------------- */ + sscanf(optarg,"%f",&(options.strict_quorum)); + break; + + /* -------------------- */ + case 's': /* strict matching quorum */ + /* -------------------- */ + sscanf(optarg,"%f",&(options.sensitivity_quorum)); + break; + + /* -------------------- */ + case 't': /* required taxon level for results */ + /* -------------------- */ + strncpy(options.taxonrank, optarg, 19); + options.taxonrank[19] = 0; + break; + + /* -------------------- */ + case 'x': /* strict matching quorum */ + /* -------------------- */ + sscanf(optarg,"%f",&(options.false_positive_quorum)); + break; + + /* ---------------------------- */ + case 'D': /* set in double strand mode */ + /* ---------------------------- */ + options.doublestrand=1; + break; + + /* ---------------------------- */ + case 'S': /* set in single strand mode */ + /* ---------------------------- */ + options.doublestrand=0; + break; + + /* ---------------------------- */ + case 'U': /* set in single strand mode */ + /* ---------------------------- */ + options.no_multi_match=TRUE; + break; + + /* ------------------------------------------ */ + case 'r': /* stores the restricting search taxonomic id */ + /* ------------------------------------------ */ + options.restricted_taxid = ECOREALLOC(options.restricted_taxid,sizeof(int32_t)*(options.r+1), + "Error on restricted_taxid reallocation"); + sscanf(optarg,"%d",&(options.restricted_taxid[options.r])); + options.r++; + break; + + /* --------------------------------- */ + case 'i': /* stores the taxonomic id to ignore */ + /* --------------------------------- */ + options.ignored_taxid = ECOREALLOC(options.ignored_taxid,sizeof(int32_t)*(options.g+1), + "Error on excluded_taxid reallocation"); + sscanf(optarg,"%d",&(options.ignored_taxid[options.g])); + options.g++; + break; + + /* -------------------- */ + case 'c': /* sequences are circular */ + /* --------------------------------- */ + options.circular = 1; + break; + + case '?': /* bad option */ + /* -------------------- */ + errflag++; + } + + } + + fprintf(stderr,"Reading taxonomy database ..."); + taxonomy = read_taxonomy(options.prefix,0); + fprintf(stderr,"Ok\n"); + + setresulttaxonrank(taxonomy, &options); /*TR: set rank level for statistics*/ + + fprintf(stderr,"Reading sequence database ...\n"); + + seqdb = readdnadb(options.prefix,&seqdbsize); + + fprintf(stderr,"Ok\n"); + fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize); + + updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples); + + rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options); + + fprintf(stderr,"Database is constituted of %5d examples\n",insamples); + fprintf(stderr," and %5d counterexamples\n",outsamples); + fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats); + + fprintf(stderr,"\nIndexing words in sequences\n"); + + printcurrenttimeinmilli(); + words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options); + printcurrenttimeinmilli(); + + fprintf(stderr,"\n Strict primer count : %d\n",words->size); + + if (options.no_multi_match) + { + (void)filterMultiStrictPrimer(words); + fprintf(stderr,"\n Strict primer with single match count : %d\n",words->size); + } + + + fprintf(stderr,"\n\n Primer sample : \n"); + for (i=0; isize); i++) + fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]); + + + fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n"); + for (i=0;istrictcount,"Free strict primer count table"); + + primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options); + + ECOFREE(words->words,"Free strict primer table"); + ECOFREE(words,"Free strict primer structure"); + fprintf(stderr,"\n\n Approximate repeats :%d \n", primers->size); + + fprintf(stderr,"\n\n Primer sample : \n"); + for (i=0; isize); i++) + fprintf(stderr," + Primer : %s example sequence count : %5d counterexample sequence count : %5d status : %s\n",ecoUnhashWord(primers->primers[i].word,options.primer_length), + primers->primers[i].inexample, + primers->primers[i].outexample, + primers->primers[i].good ? "good":"bad"); + + fprintf(stderr,"\n"); + + /*TR: Added*/ + pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options); + setoktaxforspecificity (&pairs); + + printpairs (pairs, &options, rankdbstats, seqdbsize); + + + + ECOFREE(pairs.pairs,"Free pairs table"); + + return 0; +} diff --git a/src/global.mk b/src/global.mk new file mode 100644 index 0000000..94cf61d --- /dev/null +++ b/src/global.mk @@ -0,0 +1,20 @@ +MACHINE=MAC_OS_X +LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer +MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $< + +CC=gcc +CFLAGS= -W -Wall -O5 -m64 -fast -g +#CFLAGS= -W -Wall -O0 -m64 -g +#CFLAGS= -W -Wall -O5 -fast -g + +default: all + +%.o: %.c + $(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $< + +%.P : %.c + $(MAKEDEPEND) + @sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \ + rm -f $*.d; [ -s $@ ] || rm -f $@ + +include $(SRCS:.c=.P) \ No newline at end of file diff --git a/src/libecoPCR/Makefile b/src/libecoPCR/Makefile new file mode 100644 index 0000000..e2c1e3e --- /dev/null +++ b/src/libecoPCR/Makefile @@ -0,0 +1,30 @@ + +SOURCES = ecodna.c \ + ecoError.c \ + ecoIOUtils.c \ + ecoMalloc.c \ + ecorank.c \ + ecoseq.c \ + ecotax.c \ + ecofilter.c \ + econame.c + +SRCS=$(SOURCES) + +OBJECTS= $(patsubst %.c,%.o,$(SOURCES)) + +LIBFILE= libecoPCR.a +RANLIB= ranlib + + +include ../global.mk + + +all: $(LIBFILE) + +clean: + rm -rf $(OBJECTS) $(LIBFILE) + +$(LIBFILE): $(OBJECTS) + ar -cr $@ $? + $(RANLIB) $@ diff --git a/src/libecoPCR/ecoError.P b/src/libecoPCR/ecoError.P new file mode 100644 index 0000000..7c7ae71 --- /dev/null +++ b/src/libecoPCR/ecoError.P @@ -0,0 +1,15 @@ +ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoError.c b/src/libecoPCR/ecoError.c new file mode 100644 index 0000000..00bbfa2 --- /dev/null +++ b/src/libecoPCR/ecoError.c @@ -0,0 +1,26 @@ +#include "ecoPCR.h" +#include +#include + +/* + * print the message given as argument and exit the program + * @param error error number + * @param message the text explaining what's going on + * @param filename the file source where the program failed + * @param linenumber the line where it has failed + * filename and linenumber are written at pre-processing + * time by a macro + */ +void ecoError(int32_t error, + const char* message, + const char * filename, + int linenumber) +{ + fprintf(stderr,"Error %d in file %s line %d : %s\n", + error, + filename, + linenumber, + message); + + abort(); +} diff --git a/src/libecoPCR/ecoIOUtils.P b/src/libecoPCR/ecoIOUtils.P new file mode 100644 index 0000000..fc34a70 --- /dev/null +++ b/src/libecoPCR/ecoIOUtils.P @@ -0,0 +1,15 @@ +ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoIOUtils.c b/src/libecoPCR/ecoIOUtils.c new file mode 100644 index 0000000..73fb812 --- /dev/null +++ b/src/libecoPCR/ecoIOUtils.c @@ -0,0 +1,122 @@ +#include "ecoPCR.h" +#include +#include + +#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \ + (((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF)) + + +int32_t is_big_endian() +{ + int32_t i=1; + + return (int32_t)((char*)&i)[0]; +} + + + + +int32_t swap_int32_t(int32_t i) +{ + return SWAPINT32(i); +} + + +/** + * Read part of the file + * @param *f the database + * @param recordSize the size to be read + * + * @return buffer + */ +void *read_ecorecord(FILE *f,int32_t *recordSize) +{ + static void *buffer =NULL; + int32_t buffersize=0; + int32_t read; + + if (!recordSize) + ECOERROR(ECO_ASSERT_ERROR, + "recordSize cannot be NULL"); + + read = fread(recordSize, + 1, + sizeof(int32_t), + f); + + if (feof(f)) + return NULL; + + if (read != sizeof(int32_t)) + ECOERROR(ECO_IO_ERROR,"Reading record size error"); + + if (is_big_endian()) + *recordSize=swap_int32_t(*recordSize); + + if (buffersize < *recordSize) + { + if (buffer) + buffer = ECOREALLOC(buffer,*recordSize, + "Increase size of record buffer"); + else + buffer = ECOMALLOC(*recordSize, + "Allocate record buffer"); + } + + read = fread(buffer, + 1, + *recordSize, + f); + + if (read != *recordSize) + ECOERROR(ECO_IO_ERROR,"Reading record data error"); + + return buffer; +}; + + + + + +/** + * Open the database and check it's readable + * @param filename name of the database (.sdx, .rdx, .tbx) + * @param sequencecount buffer - pointer to variable storing the number of occurence + * @param abort_on_open_error boolean to define the behaviour in case of error + * while opening the database + * @return FILE type + **/ +FILE *open_ecorecorddb(const char *filename, + int32_t *sequencecount, + int32_t abort_on_open_error) +{ + FILE *f; + int32_t read; + + f = fopen(filename,"rb"); + + if (!f) + { + if (abort_on_open_error) + ECOERROR(ECO_IO_ERROR,"Cannot open file"); + else + { + *sequencecount=0; + return NULL; + } + } + + read = fread(sequencecount, + 1, + sizeof(int32_t), + f); + + if (read != sizeof(int32_t)) + ECOERROR(ECO_IO_ERROR,"Reading record size error"); + + if (is_big_endian()) + *sequencecount=swap_int32_t(*sequencecount); + + return f; +} + diff --git a/src/libecoPCR/ecoMalloc.P b/src/libecoPCR/ecoMalloc.P new file mode 100644 index 0000000..ea3767b --- /dev/null +++ b/src/libecoPCR/ecoMalloc.P @@ -0,0 +1,15 @@ +ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoMalloc.c b/src/libecoPCR/ecoMalloc.c new file mode 100644 index 0000000..f1a0d0f --- /dev/null +++ b/src/libecoPCR/ecoMalloc.c @@ -0,0 +1,94 @@ +#include "ecoPCR.h" +#include + +static int eco_log_malloc = 0; +static size_t eco_amount_malloc=0; +static size_t eco_chunk_malloc=0; + +void eco_trace_memory_allocation() +{ + eco_log_malloc=1; +} + +void eco_untrace_memory_allocation() +{ + eco_log_malloc=0; +} + +void ecoMallocedMemory() +{ + return eco_amount_malloc; +} + +void *eco_malloc(int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line) +{ + void * chunk; + + chunk = calloc(1,chunksize); + + + if (!chunk) + ecoError(ECO_MEM_ERROR,error_message,filename,line); + + eco_chunk_malloc++; + + if (eco_log_malloc) + fprintf(stderr, + "Memory segment located at %p of size %d is allocated (file : %s [%d])", + chunk, + chunksize, + filename, + line); + + return chunk; +} + +void *eco_realloc(void *chunk, + int32_t newsize, + const char *error_message, + const char *filename, + int32_t line) +{ + void *newchunk; + + newchunk = realloc(chunk,newsize); + + + if (!newchunk) + ecoError(ECO_MEM_ERROR,error_message,filename,line); + + if (!chunk) + eco_chunk_malloc++; + + if (eco_log_malloc) + fprintf(stderr, + "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])", + chunk, + newchunk, + newsize, + filename, + line); + + return newchunk; +} + +void eco_free(void *chunk, + const char *error_message, + const char *filename, + int32_t line) +{ + free(chunk); + + if (eco_log_malloc) + fprintf(stderr, + "Memory segment %p is released => %s (file : %s [%d])", + chunk, + error_message, + filename, + line); + + eco_chunk_malloc--; +} diff --git a/src/libecoPCR/ecoPCR.h b/src/libecoPCR/ecoPCR.h new file mode 100644 index 0000000..bd13942 --- /dev/null +++ b/src/libecoPCR/ecoPCR.h @@ -0,0 +1,270 @@ +#ifndef ECOPCR_H_ +#define ECOPCR_H_ + +#include +#include + +/***************************************************** + * + * Data type declarations + * + *****************************************************/ + +/* + * + * Sequence types + * + */ + +typedef struct { + + int32_t taxid; + char AC[20]; + int32_t DE_length; + int32_t SQ_length; + int32_t CSQ_length; /*what is this CSQ_length ? */ + + char data[1]; + +} ecoseqformat_t; + +typedef struct { + int32_t taxid; + int32_t SQ_length; + int32_t isexample; + char *AC; + char *DE; + char *SQ; + + int32_t ranktaxonid;/*TR: taxon id to which the sequence belongs*/ +} ecoseq_t, *pecoseq_t; + +/* + * + * Taxonomy taxon types + * + */ + + +typedef struct { + int32_t taxid; + int32_t rank; + int32_t parent; + int32_t namelength; + char name[1]; + +} ecotxformat_t; + +typedef struct ecotxnode { + int32_t taxid; + int32_t rank; + struct ecotxnode *parent; + char *name; +} ecotx_t; + +typedef struct { + int32_t count; + ecotx_t taxon[1]; +} ecotxidx_t; + + +/* + * + * Taxonomy rank types + * + */ + +typedef struct { + int32_t count; + char* label[1]; +} ecorankidx_t; + +/* + * + * Taxonomy name types + * + */ + +typedef struct { + int32_t is_scientificname; + int32_t namelength; + int32_t classlength; + int32_t taxid; + char names[1]; +} econameformat_t; + + + typedef struct { + char *name; + char *classname; + int32_t is_scientificname; + struct ecotxnode *taxon; +} econame_t; + + +typedef struct { + int32_t count; + econame_t names[1]; +} econameidx_t; + + + typedef struct { + ecorankidx_t *ranks; + econameidx_t *names; + ecotxidx_t *taxons; +} ecotaxonomy_t; + + +/***************************************************** + * + * Function declarations + * + *****************************************************/ + +/* + * + * Low level system functions + * + */ + +int32_t is_big_endian(); +int32_t swap_int32_t(int32_t); + +void *eco_malloc(int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line); + + +void *eco_realloc(void *chunk, + int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line); + +void eco_free(void *chunk, + const char *error_message, + const char *filename, + int32_t line); + +void eco_trace_memory_allocation(); +void eco_untrace_memory_allocation(); + +#define ECOMALLOC(size,error_message) \ + eco_malloc((size),(error_message),__FILE__,__LINE__) + +#define ECOREALLOC(chunk,size,error_message) \ + eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__) + +#define ECOFREE(chunk,error_message) \ + eco_free((chunk),(error_message),__FILE__,__LINE__) + + + + +/* + * + * Error managment + * + */ + + +void ecoError(int32_t,const char*,const char *,int); + +#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__) + +#define ECO_IO_ERROR (1) +#define ECO_MEM_ERROR (2) +#define ECO_ASSERT_ERROR (3) +#define ECO_NOTFOUND_ERROR (4) + + +/* + * + * Low level Disk access functions + * + */ + +FILE *open_ecorecorddb(const char *filename, + int32_t *sequencecount, + int32_t abort_on_open_error); + +void *read_ecorecord(FILE *,int32_t *recordSize); + + + +/* + * Read function in internal binary format + */ + +FILE *open_ecoseqdb(const char *filename, + int32_t *sequencecount); + +ecoseq_t *readnext_ecoseq(FILE *); + +ecorankidx_t *read_rankidx(const char *filename); + +econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy); + + + + /** + * Read taxonomy data as formated by the ecoPCRFormat.py script. + * + * This function is normaly uses internaly by the read_taxonomy + * function and should not be called directly. + * + * @arg filename path to the *.tdx file of the reformated db + * + * @return pointer to a taxonomy index structure + */ + +ecotxidx_t *read_taxonomyidx(const char *filename); + +ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName); + +ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid); + +ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx); + +int eco_isundertaxon(ecotx_t *taxon, int other_taxid); + +ecoseq_t *ecoseq_iterator(const char *prefix); + + + +ecoseq_t *new_ecoseq(); +int32_t delete_ecoseq(ecoseq_t *); +ecoseq_t *new_ecoseq_with_data( char *AC, + char *DE, + char *SQ, + int32_t taxid + ); + + +int32_t delete_taxon(ecotx_t *taxon); +int32_t delete_taxonomy(ecotxidx_t *index); + + +int32_t rank_index(const char* label,ecorankidx_t* ranks); + +//int32_t delete_apatseq(SeqPtr pseq); +//PatternPtr buildPattern(const char *pat, int32_t error_max); +//PatternPtr complementPattern(PatternPtr pat); +// +//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular); + +char *ecoComplementPattern(char *nucAcSeq); +char *ecoComplementSequence(char *nucAcSeq); +char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end); + +ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); + +int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid); +int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid); + +#endif /*ECOPCR_H_*/ diff --git a/src/libecoPCR/ecoapat.c b/src/libecoPCR/ecoapat.c new file mode 100644 index 0000000..284e579 --- /dev/null +++ b/src/libecoPCR/ecoapat.c @@ -0,0 +1,202 @@ +#include "../libapat/libstki.h" +#include "../libapat/apat.h" + +#include "ecoPCR.h" + +#include + +static void EncodeSequence(SeqPtr seq); +static void UpperSequence(char *seq); + +/* -------------------------------------------- */ +/* uppercase sequence */ +/* -------------------------------------------- */ + +#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z')) +#define TO_UPPER(c) ((c) - 'a' + 'A') + +void UpperSequence(char *seq) +{ + char *cseq; + + for (cseq = seq ; *cseq ; cseq++) + if (IS_LOWER(*cseq)) + *cseq = TO_UPPER(*cseq); +} + +#undef IS_LOWER +#undef TO_UPPER + + + + +/* -------------------------------------------- */ +/* encode sequence */ +/* IS_UPPER is slightly faster than isupper */ +/* -------------------------------------------- */ + +#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z')) + + + +void EncodeSequence(SeqPtr seq) +{ + int i; + UInt8 *data; + char *cseq; + + data = seq->data; + cseq = seq->cseq; + + while (*cseq) { + + *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0); + data++; + cseq++; + } + + for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++) + *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0); + + for (i = 0 ; i < MAX_PATTERN ; i++) + seq->hitpos[i]->top = seq->hiterr[i]->top = 0; + +} + +#undef IS_UPPER + + +SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular) +{ + int i; + + if (!out) + { + out = ECOMALLOC(sizeof(Seq), + "Error in Allocation of a new Seq structure"); + + for (i = 0 ; i < MAX_PATTERN ; i++) + { + + if (! (out->hitpos[i] = NewStacki(kMinStackiSize))) + ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation"); + + if (! (out->hiterr[i] = NewStacki(kMinStackiSize))) + ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation"); + } + } + + + out->name = in->AC; + out->seqsiz = out->seqlen = in->SQ_length; + out->circular = circular; + + if (!out->data) + { + out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8), + "Error in Allocation of a new Seq data member"); + out->datsiz= out->seqlen+circular; + } + else if ((out->seqlen +circular) >= out->datsiz) + { + out->data = ECOREALLOC(out->data,(out->seqlen+circular), + "Error during Seq data buffer realloc"); + out->datsiz= out->seqlen+circular; + } + + out->cseq = in->SQ; + + EncodeSequence(out); + + return out; +} + +int32_t delete_apatseq(SeqPtr pseq) +{ + int i; + + if (pseq) { + + if (pseq->data) + ECOFREE(pseq->data,"Freeing sequence data buffer"); + + for (i = 0 ; i < MAX_PATTERN ; i++) { + if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]); + if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]); + } + + ECOFREE(pseq,"Freeing apat sequence structure"); + + return 0; + } + + return 1; +} + +/* + +PatternPtr buildPattern(const char *pat, int32_t error_max) +{ + PatternPtr pattern; + int32_t patlen; + + pattern = ECOMALLOC(sizeof(Pattern), + "Error in pattern allocation"); + + pattern->ok = Vrai; + pattern->hasIndel= Faux; + pattern->maxerr = error_max; + patlen = strlen(pat); + + pattern->cpat = ECOMALLOC(sizeof(char)*patlen+1, + "Error in sequence pattern allocation"); + + strncpy(pattern->cpat,pat,patlen); + pattern->cpat[patlen]=0; + UpperSequence(pattern->cpat); + + if (!CheckPattern(pattern)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking"); + + if (! EncodePattern(pattern, dna)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding"); + + if (! CreateS(pattern, ALPHA_LEN)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling"); + + return pattern; + +} + +PatternPtr complementPattern(PatternPtr pat) +{ + PatternPtr pattern; + + pattern = ECOMALLOC(sizeof(Pattern), + "Error in pattern allocation"); + + pattern->ok = Vrai; + pattern->hasIndel= pat->hasIndel; + pattern->maxerr = pat->maxerr; + pattern->patlen = pat->patlen; + + pattern->cpat = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1), + "Error in sequence pattern allocation"); + + strcpy(pattern->cpat,pat->cpat); + + ecoComplementPattern(pattern->cpat); + + if (!CheckPattern(pattern)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking"); + + if (! EncodePattern(pattern, dna)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding"); + + if (! CreateS(pattern, ALPHA_LEN)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling"); + + return pattern; + +} +*/ diff --git a/src/libecoPCR/ecodna.P b/src/libecoPCR/ecodna.P new file mode 100644 index 0000000..b9a71b9 --- /dev/null +++ b/src/libecoPCR/ecodna.P @@ -0,0 +1,5 @@ +ecodna.o ecodna.P : ecodna.c /usr/include/string.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h ecoPCR.h \ + /usr/include/stdio.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h diff --git a/src/libecoPCR/ecodna.c b/src/libecoPCR/ecodna.c new file mode 100644 index 0000000..7d29a0e --- /dev/null +++ b/src/libecoPCR/ecodna.c @@ -0,0 +1,153 @@ +#include +#include "ecoPCR.h" + +/* + * @doc: DNA alphabet (IUPAC) + */ +#define LX_BIO_DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" + +/* + * @doc: complementary DNA alphabet (IUPAC) + */ +#define LX_BIO_CDNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZ#!][" + + +static char sNuc[] = LX_BIO_DNA_ALPHA; +static char sAnuc[] = LX_BIO_CDNA_ALPHA; + +static char LXBioBaseComplement(char nucAc); +static char *LXBioSeqComplement(char *nucAcSeq); +static char *reverseSequence(char *str,char isPattern); + + +/* ---------------------------- */ + +char LXBioBaseComplement(char nucAc) +{ + char *c; + + if ((c = strchr(sNuc, nucAc))) + return sAnuc[(c - sNuc)]; + else + return nucAc; +} + +/* ---------------------------- */ + +char *LXBioSeqComplement(char *nucAcSeq) +{ + char *s; + + for (s = nucAcSeq ; *s ; s++) + *s = LXBioBaseComplement(*s); + + return nucAcSeq; +} + + +char *reverseSequence(char *str,char isPattern) +{ + char *sb, *se, c; + + if (! str) + return str; + + sb = str; + se = str + strlen(str) - 1; + + while(sb <= se) { + c = *sb; + *sb++ = *se; + *se-- = c; + } + + sb = str; + se = str + strlen(str) - 1; + + if (isPattern) + for (;sb < se; sb++) + { + if (*sb=='#') + { + if (((se - sb) > 2) && (*(sb+2)=='!')) + { + *sb='!'; + sb+=2; + *sb='#'; + } + else + { + *sb=*(sb+1); + sb++; + *sb='#'; + } + } + else if (*sb=='!') + { + *sb=*(sb-1); + *(sb-1)='!'; + } + } + + return str; +} + +char *ecoComplementPattern(char *nucAcSeq) +{ + return reverseSequence(LXBioSeqComplement(nucAcSeq),1); +} + +char *ecoComplementSequence(char *nucAcSeq) +{ + return reverseSequence(LXBioSeqComplement(nucAcSeq),0); +} + + +char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end) +{ + static char *buffer = NULL; + static int32_t buffSize= 0; + int32_t length; + + if (begin < end) + { + length = end - begin; + + if (length >= buffSize) + { + buffSize = length+1; + if (buffer) + buffer=ECOREALLOC(buffer,buffSize, + "Error in reallocating sub sequence buffer"); + else + buffer=ECOMALLOC(buffSize, + "Error in allocating sub sequence buffer"); + + } + + strncpy(buffer,nucAcSeq + begin,length); + buffer[length]=0; + } + else + { + length = end + strlen(nucAcSeq) - begin; + + if (length >= buffSize) + { + buffSize = length+1; + if (buffer) + buffer=ECOREALLOC(buffer,buffSize, + "Error in reallocating sub sequence buffer"); + else + buffer=ECOMALLOC(buffSize, + "Error in allocating sub sequence buffer"); + + } + strncpy(buffer,nucAcSeq+begin,length - end); + strncpy(buffer+(length-end),nucAcSeq ,end); + buffer[length]=0; + } + + return buffer; +} + diff --git a/src/libecoPCR/ecofilter.P b/src/libecoPCR/ecofilter.P new file mode 100644 index 0000000..d46d3e0 --- /dev/null +++ b/src/libecoPCR/ecofilter.P @@ -0,0 +1,5 @@ +ecofilter.o ecofilter.P : ecofilter.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h diff --git a/src/libecoPCR/ecofilter.c b/src/libecoPCR/ecofilter.c new file mode 100644 index 0000000..8a7dbb1 --- /dev/null +++ b/src/libecoPCR/ecofilter.c @@ -0,0 +1,19 @@ +#include "ecoPCR.h" + +int eco_is_taxid_included( ecotaxonomy_t *taxonomy, + int32_t *restricted_taxid, + int32_t tab_len, + int32_t taxid) +{ + int i; + ecotx_t *taxon; + + taxon = eco_findtaxonbytaxid(taxonomy, taxid); + + for (i=0; i < tab_len; i++) + if ( (taxon->taxid == restricted_taxid[i]) || + (eco_isundertaxon(taxon, restricted_taxid[i])) ) + return 1; + + return 0; +} \ No newline at end of file diff --git a/src/libecoPCR/econame.P b/src/libecoPCR/econame.P new file mode 100644 index 0000000..4c9946c --- /dev/null +++ b/src/libecoPCR/econame.P @@ -0,0 +1,15 @@ +econame.o econame.P : econame.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/econame.c b/src/libecoPCR/econame.c new file mode 100644 index 0000000..835d79c --- /dev/null +++ b/src/libecoPCR/econame.c @@ -0,0 +1,61 @@ +#include "ecoPCR.h" +#include +#include + +static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy); + +econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy) +{ + + int32_t count; + FILE *f; + econameidx_t *indexname; + int32_t i; + + f = open_ecorecorddb(filename,&count,1); + + indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names"); + + indexname->count=count; + + for (i=0; i < count; i++){ + readnext_econame(f,(indexname->names)+i,taxonomy); + } + + return indexname; +} + +econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy) +{ + + econameformat_t *raw; + int32_t rs; + + raw = read_ecorecord(f,&rs); + + if (!raw) + return NULL; + + if (is_big_endian()) + { + raw->is_scientificname = swap_int32_t(raw->is_scientificname); + raw->namelength = swap_int32_t(raw->namelength); + raw->classlength = swap_int32_t(raw->classlength); + raw->taxid = swap_int32_t(raw->taxid); + } + + name->is_scientificname=raw->is_scientificname; + + name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name"); + strncpy(name->name,raw->names,raw->namelength); + name->name[raw->namelength]=0; + + name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname"); + strncpy(name->classname,(raw->names+raw->namelength),raw->classlength); + name->classname[raw->classlength]=0; + + name->taxon = taxonomy->taxons->taxon + raw->taxid; + + return name; +} + diff --git a/src/libecoPCR/ecorank.P b/src/libecoPCR/ecorank.P new file mode 100644 index 0000000..75e09b9 --- /dev/null +++ b/src/libecoPCR/ecorank.P @@ -0,0 +1,15 @@ +ecorank.o ecorank.P : ecorank.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecorank.c b/src/libecoPCR/ecorank.c new file mode 100644 index 0000000..4796088 --- /dev/null +++ b/src/libecoPCR/ecorank.c @@ -0,0 +1,52 @@ +#include "ecoPCR.h" +#include +#include + +static int compareRankLabel(const void *label1, const void *label2); + +ecorankidx_t *read_rankidx(const char *filename) +{ + int32_t count; + FILE *f; + ecorankidx_t *index; + int32_t i; + int32_t rs; + char *buffer; + + f = open_ecorecorddb(filename,&count,1); + + index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1), + "Allocate rank index"); + + index->count=count; + + for (i=0; i < count; i++) + { + buffer = read_ecorecord(f,&rs); + index->label[i]=(char*) ECOMALLOC(rs+1, + "Allocate rank label"); + strncpy(index->label[i],buffer,rs); + } + + return index; +} + +int32_t rank_index(const char* label,ecorankidx_t* ranks) +{ + char **rep; + + rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel); + + if (rep) + return rep-ranks->label; + else + ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found"); + + return -1; +} + + +int compareRankLabel(const void *label1, const void *label2) +{ + return strcmp((const char*)label1,*(const char**)label2); +} diff --git a/src/libecoPCR/ecoseq.P b/src/libecoPCR/ecoseq.P new file mode 100644 index 0000000..6222690 --- /dev/null +++ b/src/libecoPCR/ecoseq.P @@ -0,0 +1,19 @@ +ecoseq.o ecoseq.P : ecoseq.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/string.h /usr/include/zlib.h /usr/include/zconf.h \ + /usr/include/sys/types.h /usr/include/unistd.h \ + /usr/include/sys/unistd.h /usr/include/sys/select.h \ + /usr/include/sys/_select.h diff --git a/src/libecoPCR/ecoseq.c b/src/libecoPCR/ecoseq.c new file mode 100644 index 0000000..141db5d --- /dev/null +++ b/src/libecoPCR/ecoseq.c @@ -0,0 +1,227 @@ +#include "ecoPCR.h" +#include +#include +#include +#include +#include + +static FILE *open_seqfile(const char *prefix,int32_t index); + + +ecoseq_t *new_ecoseq() +{ + void *tmp; + + tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure"); + + return tmp; +} + +int32_t delete_ecoseq(ecoseq_t * seq) +{ + + if (seq) + { + if (seq->AC) + ECOFREE(seq->AC,"Free sequence AC"); + + if (seq->DE) + ECOFREE(seq->DE,"Free sequence DE"); + + if (seq->SQ) + ECOFREE(seq->SQ,"Free sequence SQ"); + + ECOFREE(seq,"Free sequence structure"); + + return 0; + + } + + return 1; +} + +ecoseq_t *new_ecoseq_with_data( char *AC, + char *DE, + char *SQ, + int32_t taxid_idx + ) +{ + ecoseq_t *tmp; + int32_t lstr; + tmp = new_ecoseq(); + + tmp->taxid=taxid_idx; + + if (AC) + { + lstr =strlen(AC); + tmp->AC=ECOMALLOC((lstr+1) * sizeof(char), + "Allocate sequence accession"); + strcpy(tmp->AC,AC); + } + + if (DE) + { + lstr =strlen(DE); + tmp->DE=ECOMALLOC((lstr+1) * sizeof(char), + "Allocate sequence definition"); + strcpy(tmp->DE,DE); + } + + if (SQ) + { + lstr =strlen(SQ); + tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char), + "Allocate sequence data"); + strcpy(tmp->SQ,SQ); + } + + tmp->isexample=1; + + return tmp; + +} + +/** + * ?? used ?? + **/ +FILE *open_ecoseqdb(const char *filename, + int32_t *sequencecount) +{ + return open_ecorecorddb(filename,sequencecount,1); +} + +ecoseq_t *readnext_ecoseq(FILE *f) +{ + char *compressed=NULL; + + ecoseqformat_t *raw; + ecoseq_t *seq; + int32_t comp_status; + unsigned long int seqlength; + int32_t rs; + + raw = read_ecorecord(f,&rs); + + if (!raw) + return NULL; + + if (is_big_endian()) + { + raw->CSQ_length = swap_int32_t(raw->CSQ_length); + raw->DE_length = swap_int32_t(raw->DE_length); + raw->SQ_length = swap_int32_t(raw->SQ_length); + raw->taxid = swap_int32_t(raw->taxid); + } + + seq = new_ecoseq(); + + seq->taxid = raw->taxid; + + seq->AC = ECOMALLOC(strlen(raw->AC) +1, + "Allocate Sequence Accesion number"); + strncpy(seq->AC,raw->AC,strlen(raw->AC)); + + + seq->DE = ECOMALLOC(raw->DE_length+1, + "Allocate Sequence definition"); + strncpy(seq->DE,raw->data,raw->DE_length); + + seqlength = seq->SQ_length = raw->SQ_length; + + compressed = raw->data + raw->DE_length; + + seq->SQ = ECOMALLOC(seqlength+1, + "Allocate sequence buffer"); + + seq->isexample=1; + + comp_status = uncompress((unsigned char*)seq->SQ, + &seqlength, + (unsigned char*)compressed, + raw->CSQ_length); + + if (comp_status != Z_OK) + ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data"); + + return seq; +} + +/** + * Open the sequences database (.sdx file) + * @param prefix name of the database (radical without extension) + * @param index integer + * + * @return file object + */ +FILE *open_seqfile(const char *prefix,int32_t index) +{ + char filename_buffer[1024]; + int32_t filename_length; + FILE *input; + int32_t seqcount; + + filename_length = snprintf(filename_buffer, + 1023, + "%s_%03d.sdx", + prefix, + index); + + + + if (filename_length >= 1024) + ECOERROR(ECO_ASSERT_ERROR,"file name is too long"); + + filename_buffer[filename_length]=0; + + input=open_ecorecorddb(filename_buffer,&seqcount,0); + + if (input) + fprintf(stderr,"# Reading file %s containing %d sequences...\n", + filename_buffer, + seqcount); + + return input; +} + +ecoseq_t *ecoseq_iterator(const char *prefix) +{ + static FILE *current_seq_file= NULL; + static int32_t current_file_idx = 1; + static char current_prefix[1024]; + ecoseq_t *seq; + + if (prefix) + { + current_file_idx = 1; + + if (current_seq_file) + fclose(current_seq_file); + + strncpy(current_prefix,prefix,1023); + current_prefix[1024]=0; + + current_seq_file = open_seqfile(current_prefix, + current_file_idx); + + if (!current_seq_file) + return NULL; + + } + + seq = readnext_ecoseq(current_seq_file); + + if (!seq && feof(current_seq_file)) + { + current_file_idx++; + fclose(current_seq_file); + current_seq_file = open_seqfile(current_prefix, + current_file_idx); + + + if (current_seq_file) + seq = readnext_ecoseq(current_seq_file); + } + + return seq; +} diff --git a/src/libecoPCR/ecotax.P b/src/libecoPCR/ecotax.P new file mode 100644 index 0000000..489fc97 --- /dev/null +++ b/src/libecoPCR/ecotax.P @@ -0,0 +1,15 @@ +ecotax.o ecotax.P : ecotax.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecotax.c b/src/libecoPCR/ecotax.c new file mode 100644 index 0000000..a0ade86 --- /dev/null +++ b/src/libecoPCR/ecotax.c @@ -0,0 +1,329 @@ +#include "ecoPCR.h" +#include +#include +#include + +static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon); + + /** + * Open the taxonomy database + * @param pointer to the database (.tdx file) + * @return a ecotxidx_t structure + */ +ecotxidx_t *read_taxonomyidx(const char *filename) +{ + int32_t count; + FILE *f; + ecotxidx_t *index; + int32_t i; + + f = open_ecorecorddb(filename,&count,1); + + index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1), + "Allocate taxonomy"); + + index->count=count; + for (i=0; i < count; i++){ + readnext_ecotaxon(f,&(index->taxon[i])); + index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent; + } + return index; +} + + +int32_t delete_taxonomy(ecotxidx_t *index) +{ + int32_t i; + + if (index) + { + for (i=0; i< index->count; i++) + if (index->taxon[i].name) + ECOFREE(index->taxon[i].name,"Free scientific name"); + + ECOFREE(index,"Free Taxonomy"); + + return 0; + } + + return 1; +} + + + +int32_t delete_taxon(ecotx_t *taxon) +{ + if (taxon) + { + if (taxon->name) + ECOFREE(taxon->name,"Free scientific name"); + + ECOFREE(taxon,"Free Taxon"); + + return 0; + } + + return 1; +} + + +/** + * Read the database for a given taxon a save the data + * into the taxon structure(if any found) + * @param *f pointer to FILE type returned by fopen + * @param *taxon pointer to the structure + * + * @return a ecotx_t structure if any taxon found else NULL + */ +ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon) +{ + + ecotxformat_t *raw; + int32_t rs; + + raw = read_ecorecord(f,&rs); + + if (!raw) + return NULL; + + if (is_big_endian()) + { + raw->namelength = swap_int32_t(raw->namelength); + raw->parent = swap_int32_t(raw->parent); + raw->rank = swap_int32_t(raw->rank); + raw->taxid = swap_int32_t(raw->taxid); + } + + taxon->parent = (ecotx_t*)(size_t)raw->parent; + taxon->taxid = raw->taxid; + taxon->rank = raw->rank; + + taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char), + "Allocate taxon scientific name"); + + strncpy(taxon->name,raw->name,raw->namelength); + + return taxon; +} + + +ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName) +{ + ecotaxonomy_t *tax; + char *filename; + int buffsize; + + tax = ECOMALLOC(sizeof(ecotaxonomy_t), + "Allocate taxonomy structure"); + + buffsize = strlen(prefix)+10; + + filename = ECOMALLOC(buffsize, + "Allocate filename"); + + snprintf(filename,buffsize,"%s.rdx",prefix); + + tax->ranks = read_rankidx(filename); + + snprintf(filename,buffsize,"%s.tdx",prefix); + + tax->taxons = read_taxonomyidx(filename); + + if (readAlternativeName) + { + snprintf(filename,buffsize,"%s.ndx",prefix); + tax->names=read_nameidx(filename,tax); + } + else + tax->names=NULL; + return tax; + +} + + + +int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy) +{ + if (taxonomy) + { + if (taxonomy->ranks) + ECOFREE(taxonomy->ranks,"Free rank index"); + + if (taxonomy->taxons) + ECOFREE(taxonomy->taxons,"Free taxon index"); + + ECOFREE(taxonomy,"Free taxonomy structure"); + + return 0; + } + + return 1; +} + +ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, + int32_t rankidx) +{ + ecotx_t *current_taxon; + ecotx_t *next_taxon; + + current_taxon = taxon; + next_taxon = current_taxon->parent; + + while ((current_taxon!=next_taxon) && // I' am the root node + (current_taxon->rank!=rankidx)) + { + current_taxon = next_taxon; + next_taxon = current_taxon->parent; + } + + if (current_taxon->rank==rankidx) + return current_taxon; + else + return NULL; +} + +/** + * Get back information concerning a taxon from a taxonomic id + * @param *taxonomy the taxonomy database + * @param taxid the taxonomic id + * + * @result a ecotx_t structure containing the taxonimic information + **/ +ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, + int32_t taxid) +{ + ecotx_t *current_taxon; + int32_t taxoncount; + int32_t i; + + taxoncount=taxonomy->taxons->count; + + for (current_taxon=taxonomy->taxons->taxon, + i=0; + i < taxoncount; + i++, + current_taxon++){ + if (current_taxon->taxid==taxid){ + return current_taxon; + } + } + + return (ecotx_t*)NULL; +} + +/** + * Find out if taxon is son of other taxon (identified by its taxid) + * @param *taxon son taxon + * @param parent_taxid taxonomic id of the other taxon + * + * @return 1 is the other taxid math a parent taxid, else 0 + **/ +int eco_isundertaxon(ecotx_t *taxon, + int other_taxid) +{ + ecotx_t *next_parent; + + next_parent = taxon->parent; + + while ( (other_taxid != next_parent->taxid) && + (strcmp(next_parent->name, "root")) ) + { + next_parent = next_parent->parent; + } + + if (other_taxid == next_parent->taxid) + return 1; + else + return 0; +} + +ecotx_t *eco_getspecies(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("species",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} + +ecotx_t *eco_getgenus(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("genus",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} + + +ecotx_t *eco_getfamily(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("family",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} + +ecotx_t *eco_getkingdom(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("kingdom",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} + +ecotx_t *eco_getsuperkingdom(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("superkingdom",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} diff --git a/src/libecoprimer/Makefile b/src/libecoprimer/Makefile new file mode 100644 index 0000000..d68202b --- /dev/null +++ b/src/libecoprimer/Makefile @@ -0,0 +1,34 @@ + +SOURCES = goodtaxon.c \ + readdnadb.c \ + smothsort.c \ + sortword.c \ + hashsequence.c \ + strictprimers.c \ + aproxpattern.c \ + merge.c \ + queue.c \ + libstki.c \ + sortmatch.c \ + pairs.c \ + apat_search.c + +SRCS=$(SOURCES) + +OBJECTS= $(patsubst %.c,%.o,$(SOURCES)) + +LIBFILE= libecoprimer.a +RANLIB= ranlib + + +include ../global.mk + + +all: $(LIBFILE) + +clean: + rm -rf $(OBJECTS) $(LIBFILE) + +$(LIBFILE): $(OBJECTS) + ar -cr $@ $? + $(RANLIB) $@ diff --git a/src/libecoprimer/apat.h b/src/libecoprimer/apat.h new file mode 100644 index 0000000..dd9ae06 --- /dev/null +++ b/src/libecoprimer/apat.h @@ -0,0 +1,120 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Dec. 94 */ +/* File: apat.h */ +/* Purpose: pattern scan */ +/* History: */ +/* 28/12/94 : ascan first version */ +/* 14/05/99 : last revision */ +/* ==================================================== */ + + +#ifndef H_apat +#define H_apat + + +#include "libstki.h" +#include "inttypes.h" +#include "../libecoPCR/ecoPCR.h" + + +/* ----------------------------------------------- */ +/* constantes */ +/* ----------------------------------------------- */ + +#ifndef BUFSIZ +#define BUFSIZ 1024 /* io buffer size */ +#endif + +#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */ + +#define ALPHA_LEN 4 /* alphabet length */ + /* *DO NOT* modify */ + +#define MAX_PATTERN 4 /* max # of patterns */ + /* *DO NOT* modify */ + +#define MAX_PAT_LEN 32 /* max pattern length */ + /* *DO NOT* modify */ + +#define MAX_PAT_ERR 32 /* max # of errors */ + /* *DO NOT* modify */ + +#define PATMASK 0x3ffffff /* mask for 26 symbols */ + /* *DO NOT* modify */ + +#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */ + /* *DO NOT* modify */ + + /* mask for position */ +#define ONEMASK 0x80000000 /* mask for highest position */ + + /* masks for Levenhstein edit */ +#define OPER_IDT 0x00000000 /* identity */ +#define OPER_INS 0x40000000 /* insertion */ +#define OPER_DEL 0x80000000 /* deletion */ +#define OPER_SUB 0xc0000000 /* substitution */ + +#define OPER_SHFT 30 /* shift */ + + /* Levenhstein Opcodes */ +#define SOPER_IDT 0x0 /* identity */ +#define SOPER_INS 0x1 /* insertion */ +#define SOPER_DEL 0x2 /* deletion */ +#define SOPER_SUB 0x3 /* substitution */ + + /* Levenhstein Opcodes masks */ +#define OPERMASK 0xc0000000 /* mask for Opcodes */ +#define NOPERMASK 0x3fffffff /* negate of previous */ + + + +/* ----------------------------------------------- */ +/* data structures */ +/* ----------------------------------------------- */ + + +typedef uint32_t pattern_t[ALPHA_LEN], *ppattern_t; + + /* -------------------- */ +typedef struct { /* pattern */ + /* -------------------- */ +int patlen; /* pattern length */ +int maxerr; /* max # of errors */ +uint32_t omask; /* oblig. bits mask */ +bool_t circular; /* is circular sequence */ +} patternParam_t, *ppatternParam_t; + + +/* ----------------------------------------------- */ +/* macros */ +/* ----------------------------------------------- */ + +#ifndef NEW +#define NEW(typ) (typ*)malloc(sizeof(typ)) +#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ)) +#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ)) +#define FREE(ptr) free((void *) ptr) +#endif + +/* ----------------------------------------------- */ +/* prototypes */ +/* ----------------------------------------------- */ + + /* apat_search.c */ + +int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat, + ppatternParam_t param, + StackiPtr stkpos); + +int32_t ManberSub(pecoseq_t pseq,ppattern_t pat, + ppatternParam_t param, + StackiPtr stkpos); + +int32_t ManberAll(pecoseq_t pseq,ppattern_t pat, + ppatternParam_t param, + StackiPtr stkpos); + + +#endif /* H_apat */ + diff --git a/src/libecoprimer/apat_parse.c b/src/libecoprimer/apat_parse.c new file mode 100644 index 0000000..c11b3a7 --- /dev/null +++ b/src/libecoprimer/apat_parse.c @@ -0,0 +1,65 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Mar. 92 */ +/* File: apat_parse.c */ +/* Purpose: Codage du pattern */ +/* History: */ +/* 00/07/94 : first version (stanford) */ +/* 00/11/94 : revised for DNA/PROTEIN */ +/* 30/12/94 : modified EncodePattern */ +/* for manber search */ +/* 14/05/99 : indels added */ +/* ==================================================== */ + +#include +#include +#include +#include + +#include "apat.h" +#include "ecoprimer.h" + + + /* IUPAC Dna */ +static int32_t sDnaCode[] = { + /* IUPAC */ + + 0x00000001 /* A */, 0x0000000E /* B */, 0x00000002 /* C */, + 0x0000000D /* D */, 0x00000000 /* E */, 0x00000000 /* F */, + 0x00000004 /* G */, 0x0000000B /* H */, 0x00000000 /* I */, + 0x00000000 /* J */, 0x0000000C /* K */, 0x00000000 /* L */, + 0x00000003 /* M */, 0x0000000F /* N */, 0x00000000 /* O */, + 0x00000000 /* P */, 0x00000000 /* Q */, 0x00000005 /* R */, + 0x00000006 /* S */, 0x00000008 /* T */, 0x00000008 /* U */, + 0x00000007 /* V */, 0x00000009 /* W */, 0x00000000 /* X */, + 0x0000000A /* Y */, 0x00000000 /* Z */ +}; + + +/* -------------------------------------------- */ +/* internal replacement of gets */ +/* -------------------------------------------- */ +static char *sGets(char *buffer, int size) { + + char *ebuf; + + if (! fgets(buffer, size-1, stdin)) + return NULL; + + /* remove trailing line feed */ + + ebuf = buffer + strlen(buffer); + + while (--ebuf >= buffer) { + if ((*ebuf == '\n') || (*ebuf == '\r')) + *ebuf = '\000'; + else + break; + } + + return buffer; +} + +/* -------------------------------------------- */ +/* Interface */ +/* -------------------------------------------- */ diff --git a/src/libecoprimer/apat_search.P b/src/libecoprimer/apat_search.P new file mode 100644 index 0000000..bfc181f --- /dev/null +++ b/src/libecoprimer/apat_search.P @@ -0,0 +1,17 @@ +apat_search.o apat_search.P : apat_search.c /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/string.h libstki.h ecotype.h apat.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + ../libecoPCR/ecoPCR.h diff --git a/src/libecoprimer/apat_search.c b/src/libecoprimer/apat_search.c new file mode 100644 index 0000000..0ed417a --- /dev/null +++ b/src/libecoprimer/apat_search.c @@ -0,0 +1,156 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Dec. 94 */ +/* File: apat_search.c */ +/* Purpose: recherche du pattern */ +/* algorithme de Baeza-Yates/Gonnet */ +/* Manber (agrep) */ +/* History: */ +/* 07/12/94 : first version */ +/* 28/12/94 : revised version */ +/* 14/05/99 : last revision */ +/* ==================================================== */ + + +#include +#include +#include + +#include "libstki.h" +#include "apat.h" + +#define POP PopiOut +#define PUSH(s,v) PushiIn(&(s),(v)) +#define TOPCURS CursiToTop +#define DOWNREAD ReadiDown + +#define KRONECK(x, msk) ((~x & msk) ? 0 : 1) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + + +/* -------------------------------------------- */ +/* Baeza-Yates/Manber algorithm */ +/* NoError */ +/* -------------------------------------------- */ +int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat, + ppatternParam_t param, + StackiPtr stkpos) +{ + int32_t pos; + uint32_t smask, r; + uint8_t *data; + int32_t end; + + end = (size_t)(pseq->SQ_length); + + if (param->circular) + end+=param->patlen - 1; + + + /* create local masks */ + smask = r = 0x1L << param->patlen; + /* init. scan */ + data = (uint8_t*)(pseq->SQ); + + /* loop on text data */ + for (pos = 0 ; pos < end ; pos++,data++) { + if (pos==pseq->SQ_length) + data=(uint8_t*)(pseq->SQ); + + if (*data < 4) + r = (r >> 1) & pat[*data]; + else + r=0; + + if (r & 0x1L) { + PUSH(stkpos, pos - param->patlen + 1); + } + + r |= smask; + } + return stkpos->top; /* aka # of hits */ +} + +/* -------------------------------------------- */ +/* Baeza-Yates/Manber algorithm */ +/* Substitution only */ +/* */ +/* Note : r array is stored as : */ +/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */ +/* */ +/* -------------------------------------------- */ +int32_t ManberSub(pecoseq_t pseq,ppattern_t pat, + ppatternParam_t param, + StackiPtr stkpos) +{ + int e, found; + int32_t pos; + uint32_t smask, cmask, sindx; + uint32_t *pr, r[2 * MAX_PAT_ERR + 2]; + uint8_t *data; + int32_t end; + + end = (size_t)(pseq->SQ_length); + + if (param->circular) + end+=param->patlen - 1; + + /* create local masks */ + r[0] = r[1] = 0x0; + + cmask = smask = 0x1L << param->patlen; + + for (e = 0, pr = r + 3 ; e <= param->maxerr ; e++, pr += 2) + *pr = cmask; + + cmask = ~ param->omask; // A VOIR !!!!! omask (new) doit tre composŽ de + et - ... Ancien omask : bits + + /* init. scan */ + data = (uint8_t*)(pseq->SQ); + + /* loop on text data */ + + for (pos = 0 ; pos < end ; pos++,data++) { + if (pos==pseq->SQ_length) + data=(uint8_t*)(pseq->SQ); + + sindx = (*data==4) ? 0:pat[*data]; + + for (e = found = 0, pr = r ; e <= param->maxerr ; e++, pr += 2) { + + pr[2] = pr[3] | smask; + + pr[3] = ((pr[0] >> 1) & cmask) /* sub */ + | ((pr[2] >> 1) & sindx); /* ident */ + + if (pr[3] & 0x1L) { /* found */ + if (! found) { + PUSH(stkpos, pos - param->patlen + 1); + } + found++; + } + } + } + + return stkpos->top; /* aka # of hits */ +} + + +/* -------------------------------------------- */ +/* Baeza-Yates/Manber algorithm */ +/* API call to previous functions */ +/* -------------------------------------------- */ +int32_t ManberAll(pecoseq_t pseq,ppattern_t pat, + ppatternParam_t param, + StackiPtr stkpos) +{ + if (param->maxerr == 0) + return ManberNoErr(pseq, + pat, param, + stkpos); + else + return ManberSub(pseq, + pat, param, + stkpos); +} + diff --git a/src/libecoprimer/aproxpattern.P b/src/libecoprimer/aproxpattern.P new file mode 100644 index 0000000..1b3ebf8 --- /dev/null +++ b/src/libecoprimer/aproxpattern.P @@ -0,0 +1,17 @@ +aproxpattern.o aproxpattern.P : aproxpattern.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h /usr/include/math.h /usr/include/architecture/i386/math.h diff --git a/src/libecoprimer/aproxpattern.c b/src/libecoprimer/aproxpattern.c new file mode 100644 index 0000000..d924281 --- /dev/null +++ b/src/libecoprimer/aproxpattern.c @@ -0,0 +1,236 @@ +/* + * aproxpattern.c + * + * Created on: 20 nov. 2008 + * Author: coissac + */ + + +#include "ecoprimer.h" +#include "apat.h" +#include + +static uint8_t encoder[] = {0, // A + 4, // b + 1, // C + 4,4,4, // d, e, f + 2, // G + 4,4,4,4,4,4,4,4,4,4,4,4, // h,i,j,k,l,m,n,o,p,q,r,s + 3,3, // T,U + 4,4,4,4,4}; // v,w,x,y,z + + +ppattern_t buildPatternFromWord(word_t word, uint32_t patlen) +{ + static pattern_t pattern; + uint32_t i; + + for (i = 0 ; i < ALPHA_LEN ; i++) + pattern[i] = 0x0; + + for (i=0;i < patlen; i++) + { + pattern[word & 3LLU] |= 1 << i; + word>>=2; + } + + return pattern; + +} + + +#ifdef IS_UPPER(c) +#undef IS_UPPER(c) +#endif + +/* -------------------------------------------- */ +/* encode sequence */ +/* IS_UPPER is slightly faster than isupper */ +/* -------------------------------------------- */ + +#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z')) + +void encodeSequence(ecoseq_t *seq) +{ + int i; + uint8_t *data; + char *cseq; + + data = (uint8_t*)(seq->SQ); + cseq = seq->SQ; + + for (i=0;iSQ_length;i++,data++,cseq++) + { + *data = encoder[(IS_UPPER(*cseq) ? *cseq - 'A' : 'Z')]; + } +} + +pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount, + pwordcount_t words,poptions_t options) +{ + pprimer_t data; + pprimercount_t primers; + ppattern_t pattern; + patternParam_t params; + uint32_t i; + uint32_t w; + uint32_t j; + Stacki positions; + uint32_t count=1; + uint32_t goodPrimers=0; + + uint32_t inSequenceQuorum; + uint32_t outSequenceQuorum; + bool_t conserved = TRUE; + + //poslist_t ttt; + + + inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum); + outSequenceQuorum = (uint32_t)floor((float)(seqdbsize-exampleCount) * options->false_positive_quorum); + + fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",inSequenceQuorum,exampleCount); + fprintf(stderr," Primers should not be present in more than %d/%d counterexample sequences\n",outSequenceQuorum,(seqdbsize-exampleCount)); + + data = ECOMALLOC(words->size * sizeof(primer_t), + "Cannot allocate memory for fuzzy matching results"); + + params.circular = options->circular; + params.maxerr = options->error_max; + params.omask = (1 << options->strict_three_prime) -1; + params.patlen = options->primer_length; + + positions.val=NULL; + + for (i=0,w=0; i < words->size; i++) + { + data[w].word=WORD(words->words[i]); + data[w].inexample = 0; + data[w].outexample= 0; + count = 1; + + if (conserved) + { + data[w].directCount=ECOMALLOC(seqdbsize * sizeof(uint32_t), + "Cannot allocate memory for primer position"); + data[w].directPos = ECOMALLOC(seqdbsize * sizeof(poslist_t), + "Cannot allocate memory for primer position"); + data[w].reverseCount=ECOMALLOC(seqdbsize * sizeof(uint32_t), + "Cannot allocate memory for primer position"); + data[w].reversePos = ECOMALLOC(seqdbsize * sizeof(poslist_t), + "Cannot allocate memory for primer position"); + } + + pattern = buildPatternFromWord(data[w].word,options->primer_length); + positions.val=NULL; + + for (j=0; j < seqdbsize && (count < 2 || !options->no_multi_match); j++) + { + positions.cursor=0; + positions.top =0; + if (!positions.val) + { + positions.size=1; + positions.val = ECOMALLOC(sizeof(uint32_t), + "Cannot allocate memory for primer position"); + } + + + count = ManberAll(database[j],pattern,¶ms,&positions); + data[w].directCount[j]=count; + + + if (count>1) + { + data[w].directPos[j].pointer = (uint32_t*)positions.val; + positions.val=NULL; + } + else + { + data[w].directPos[j].pointer=NULL; + if (count==1) + data[w].directPos[j].value = (uint32_t)*(positions.val); + } + + + } + + pattern = buildPatternFromWord(ecoComplementWord(data[w].word,options->primer_length), + options->primer_length); + + for (j=0; j < seqdbsize && (count < 2 || !options->no_multi_match); j++) + { + positions.cursor=0; + positions.top =0; + if (!positions.val) + { + positions.size=1; + positions.val = ECOMALLOC(sizeof(uint32_t), + "Cannot allocate memory for primer position"); + } + + count = ManberAll(database[j],pattern,¶ms,&positions); + data[w].reverseCount[j]=count; + + if (count>1) + { + data[w].reversePos[j].pointer = (uint32_t*)positions.val; + positions.val=NULL; + } + else + { + data[w].reversePos[j].pointer=NULL; + if (count==1) + data[w].reversePos[j].value = (uint32_t)*(positions.val); + } + + if (database[j]->isexample) + { + data[w].inexample+=(data[w].directCount[j] || data[w].reverseCount[j])? 1:0; + } + else + { + data[w].outexample+=(data[w].directCount[j] || data[w].reverseCount[j])? 1:0; + + } + + count+=data[w].directCount[j]; + } + + data[w].good = data[w].inexample >= inSequenceQuorum && data[w].outexample <= outSequenceQuorum; + goodPrimers+=data[w].good? 1:0; + + fprintf(stderr,"Primers %5d/%d analyzed => sequence : %s in %d example and %d counterexample sequences \r", + i+1,words->size,ecoUnhashWord(data[w].word,options->primer_length), + data[w].inexample,data[w].outexample); + + + conserved=data[w].inexample >= inSequenceQuorum; + conserved=conserved && (count < 2 || !options->no_multi_match); + + if (conserved) + w++; + } + + if (positions.val) + ECOFREE(positions.val,"Free stack position pointer"); + + if (!conserved) + { + ECOFREE(data[w].directCount,"Free direct count table"); + ECOFREE(data[w].directPos,"Free direct count table"); + ECOFREE(data[w].reverseCount,"Free direct count table"); + ECOFREE(data[w].reversePos,"Free direct count table"); + } + + fprintf(stderr,"\n\nOn %d analyzed primers %d respect quorum conditions\n",words->size,goodPrimers); + fprintf(stderr,"Conserved primers for further analysis : %d/%d\n",w,words->size); + + primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table"); + primers->primers=ECOREALLOC(data, + w * sizeof(primer_t), + "Cannot reallocate memory for fuzzy matching results"); + primers->size=w; + + return primers; +} diff --git a/src/libecoprimer/debug.h b/src/libecoprimer/debug.h new file mode 100644 index 0000000..48f473a --- /dev/null +++ b/src/libecoprimer/debug.h @@ -0,0 +1,29 @@ +/* + * debug.h + * + * Created on: 12 nov. 2008 + * Author: coissac + */ + +#ifndef DEBUG_H_ +#define DEBUG_H_ + +#include + + +#ifdef DEBUG + +#define DEBUG_LOG(message,...) { \ + char *text; \ + (void)asprintf(&text,(message),##__VA_ARGS__); \ + fprintf(stderr,"DEBUG %s (line %d) : %s\n",__FILE__,__LINE__,(text)); \ + free(text); \ + } + +#else + +#define DEBUG_LOG(message, ...) + +#endif + +#endif /* DEBUG_H_ */ diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h new file mode 100644 index 0000000..5d26573 --- /dev/null +++ b/src/libecoprimer/ecoprimer.h @@ -0,0 +1,238 @@ +/* + * epsort.h + * + * Created on: 6 nov. 2008 + * Author: coissac + */ + +#ifndef EPSORT_H_ +#define EPSORT_H_ + +#include +#include +#include +#include "ecotype.h" +#include "../libecoPCR/ecoPCR.h" +#include "apat.h" + +#define DEBUG +#include "debug.h" + +/**** + * Word format used : + * + * bit 63 : bad word -> this word should not be used + * bit 62 : multi word -> this word is not uniq in at least one seq + * bits 0-61 : hashed dna word of max size 31 pb + * code used for a : 00 + * code used for c : 01 + * code used for g : 10 + * code used for t : 11 + */ + +typedef uint64_t word_t, *pword_t; + +#define WORD(x) ((x) & 0x3FFFFFFFFFFFFFFFLLU) +#define WORD(x) ((x) & 0x3FFFFFFFFFFFFFFFLLU) + +#define ISBADWORD(x) (((x) & 0x8000000000000000LLU) >> 63) +#define SETBADWORD(x) ((x) | 0x8000000000000000LLU) +#define RESETBADWORD(x) ((x) & 0x7FFFFFFFFFFFFFFFLLU) + +#define ISMULTIWORD(x) (((x) & 0x4000000000000000LLU) >> 62) +#define SETMULTIWORD(x) ((x) | 0x4000000000000000LLU) +#define RESETMULTIWORD(x) ((x) & 0xBFFFFFFFFFFFFFFFLLU) + + +#define WORDMASK(s) ((1LLU << ((s) * 2)) -1) +#define LSHIFTWORD(x,s) (((x) << 2) & WORDMASK(s)) +#define RSHIFTWORD(x,s) (((x) & WORDMASK(s))>> 2) + +#define RAPPENDBASE(x,s,c) (LSHIFTWORD((x),(s)) | (word_t)(c)) +#define LAPPENDBASE(x,s,c) (RSHIFTWORD((x),(s)) | ((word_t)((~(c)) & 3) << (((s)-1) *2))) + + +#define ECO_ASSERT(x,message) if (!(x)) \ + { \ + fprintf(stderr,"Assertion Error in %s (line %d): %s\n", \ + __FILE__,\ + __LINE__,\ + message\ + ); \ + exit(ECO_ASSERT_ERROR); \ + } + +#define MINI(x,y) (((x) < (y)) ? (x):(y)) +#define MAXI(x,y) (((x) < (y)) ? (y):(x)) + +typedef struct { + pword_t words; + uint32_t *strictcount; + uint32_t inseqcount; + uint32_t outseqcount; + uint32_t size; +} wordcount_t, *pwordcount_t; + + +typedef union { + uint32_t *pointer; + uint32_t value; +} poslist_t, *ppostlist_t; + +typedef struct { + word_t word; + uint32_t *directCount; + ppostlist_t directPos; + + uint32_t *reverseCount; + ppostlist_t reversePos; + bool_t good; + uint32_t inexample; + uint32_t outexample; +} primer_t, *pprimer_t; + +typedef struct { + pprimer_t primers; + uint32_t size; +} primercount_t, *pprimercount_t; + +typedef struct { + word_t word; + uint32_t position; + bool_t strand; + bool_t good; /*TR: Added*/ +} primermatch_t, *pprimermatch_t; + +/*TR: Added*/ +typedef struct { + pprimermatch_t matches; + uint32_t matchcount; +} primermatchcount_t, *pprimermatchcount_t; + +typedef struct { + char *amplifia; + int32_t *taxonids; + uint32_t seqidcount; + uint32_t seqidindex; +} ampseqset_t, *pampseqset_t; + +typedef struct { + int32_t taxonid; + char **amplifia; + uint32_t amplifiacount; + uint32_t amplifiaindex; +} taxampset_t, *ptaxampset_t; + +typedef struct { + word_t w1; + word_t w2; + uint32_t inexample; /*inexample count*/ + uint32_t outexample; /*outexample count*/ + + uint32_t mind; + uint32_t maxd; + + uint32_t ampsetcount; + uint32_t ampsetindex; + pampseqset_t ampset; + + uint32_t taxsetcount; + uint32_t taxsetindex; + ptaxampset_t taxset; + + uint32_t oktaxoncount; +} pairs_t, *ppairs_t; + +/*TR: Added*/ +typedef struct { + ppairs_t pairs; + uint32_t paircount; +}pairscount_t, *ppairscount_t; + +typedef struct { + pword_t words; + uint32_t *count; + uint32_t push; + uint32_t pop; + uint32_t size; + bool_t empty; + bool_t full; +} queue_t, *pqueue_t; + +typedef struct { + pword_t words; + uint32_t *count; + uint32_t write; + uint32_t read1; + uint32_t read2; + uint32_t size; +} merge_t, *pmerge_t; + + +typedef struct { + uint32_t lmin; //**< Amplifia minimal length + uint32_t lmax; //**< Amplifia maximal length + uint32_t error_max; //**< maximum error count in fuzzy search + uint32_t primer_length; //**< minimal length of the primers + int32_t *restricted_taxid; //**< limit amplification below these taxid + int32_t *ignored_taxid; //**< no amplification below these taxid + char *prefix; + uint32_t circular; + uint32_t doublestrand; + float strict_quorum; + float strict_exclude_quorum; + float sensitivity_quorum; + float false_positive_quorum; + uint32_t strict_three_prime; + int32_t r; //**< count of restrited taxa (restricted_taxid array size) + int32_t g; //**< count of ignored taxa (ignored_taxid array size) + bool_t no_multi_match; + char taxonrank[20]; //TR to count ranks against a pair + int32_t taxonrankidx; //TR to count ranks against a pair +} options_t, *poptions_t; + +typedef ecoseq_t **pecodnadb_t; + +void sortword(pword_t table,uint32_t N); + + +pecodnadb_t readdnadb(const char *name, uint32_t *size); + +int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options); + +uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq); +pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size); +uint32_t ecoCompactHashSequence(pword_t dest,uint32_t size); +const char* ecoUnhashWord(word_t word,uint32_t size); +word_t ecoComplementWord(word_t word,uint32_t size); +uint32_t ecoFindWord(pwordcount_t table,word_t word); + + +void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum); +pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq); +void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq); + +pqueue_t newQueue(pqueue_t queue, uint32_t size); +pqueue_t resizeQueue(pqueue_t queue, uint32_t size); + +void pop(pqueue_t queue); +void push(pqueue_t queue, word_t word, uint32_t count); + +pqueue_t cleanQueue(pqueue_t queue); + +pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, + uint32_t exampleCount, poptions_t options); +uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers); + +void encodeSequence(ecoseq_t *seq); +ppattern_t buildPatternFromWord(word_t word, uint32_t patlen); + +pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount, + pwordcount_t words,poptions_t options); + +void sortmatch(pprimermatch_t table,uint32_t N); + +/*TR: Added*/ +pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options); + +#endif /* EPSORT_H_ */ diff --git a/src/libecoprimer/ecotype.h b/src/libecoprimer/ecotype.h new file mode 100644 index 0000000..38ce5a8 --- /dev/null +++ b/src/libecoprimer/ecotype.h @@ -0,0 +1,14 @@ +/* + * ecotype.h + * + * Created on: 24 nov. 2008 + * Author: coissac + */ + +#ifndef ECOTYPE_H_ +#define ECOTYPE_H_ + +typedef enum { FALSE=0,TRUE=1} bool_t, *pbool_t; + + +#endif /* ECOTYPE_H_ */ diff --git a/src/libecoprimer/goodtaxon.P b/src/libecoprimer/goodtaxon.P new file mode 100644 index 0000000..a990580 --- /dev/null +++ b/src/libecoprimer/goodtaxon.P @@ -0,0 +1,17 @@ +goodtaxon.o goodtaxon.P : goodtaxon.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h diff --git a/src/libecoprimer/goodtaxon.c b/src/libecoprimer/goodtaxon.c new file mode 100644 index 0000000..f4d7598 --- /dev/null +++ b/src/libecoprimer/goodtaxon.c @@ -0,0 +1,27 @@ +/* + * goodtaxon.c + * + * Created on: 7 nov. 2008 + * Author: coissac + */ + + +#include "ecoprimer.h" + +int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options) +{ + int result; + + result=( (options->r == 0) || (eco_is_taxid_included(taxonomy, + options->restricted_taxid, + options->r, + taxonomy->taxons->taxon[taxon].taxid) + )) && + ((options->g == 0) || !(eco_is_taxid_included(taxonomy, + options->ignored_taxid, + options->g, + taxonomy->taxons->taxon[taxon].taxid) + )); + + return result; +} diff --git a/src/libecoprimer/hashsequence.P b/src/libecoprimer/hashsequence.P new file mode 100644 index 0000000..d4be603 --- /dev/null +++ b/src/libecoprimer/hashsequence.P @@ -0,0 +1,17 @@ +hashsequence.o hashsequence.P : hashsequence.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h diff --git a/src/libecoprimer/hashsequence.c b/src/libecoprimer/hashsequence.c new file mode 100644 index 0000000..8dfe6d4 --- /dev/null +++ b/src/libecoprimer/hashsequence.c @@ -0,0 +1,203 @@ +/* + * hashsequence.c + * + * Created on: 7 nov. 2008 + * Author: coissac + */ + + +#include "ecoprimer.h" + +static int cmpword(const void *x,const void *y); + +static int8_t encoder[] = {0, // A + -1, // b + 1, // C + -1,-1,-1, // d, e, f + 2, // G + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // h,i,j,k,l,m,n,o,p,q,r,s + 3,3, // T,U + -1,-1,-1,-1,-1}; // v,w,x,y,z + + +uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq) +{ + uint32_t wordcount; + + wordcount = seq->SQ_length; + + if (!circular) wordcount-=wordsize-1; + + return wordcount; +} + +pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size) +{ + uint32_t i=0; + uint32_t j; + char *base; + int8_t code; + int32_t error=0; + word_t word=0; + word_t antiword=0; + uint32_t lmax=0; + + (*size)=0; + + lmax = seq->SQ_length; + if (!circular) + lmax-= wordsize-1; + + if (!dest) + dest = ECOMALLOC(lmax*sizeof(word_t), + "I cannot allocate memory for sequence hashing" + ); + +// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i)); + + for (i=0, base = seq->SQ; i < wordsize && i < lmax; i++,base++) + { + error<<= 1; + + code = encoder[(*base) - 'A']; + if (code <0) + { + code = 0; + error|= 1; + } + + + word=RAPPENDBASE(word,wordsize,code); + if (doublestrand) + antiword=LAPPENDBASE(antiword,wordsize,code); + } + + if (!error && i==wordsize) + { + dest[*size]=(doublestrand) ? MINI(word,antiword):word; + (*size)++; + } + + + for (j=1; j < lmax; j++,i++,base++) + { + +// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j)); + + /* roll over the sequence for circular ones */ + if (i==(uint32_t)seq->SQ_length) base=seq->SQ; + + error<<= 1; + + code = encoder[(*base) - 'A']; + if (code <0) + { + code = 0; + error|= 1; + } + + word=RAPPENDBASE(word,wordsize,code); + if (doublestrand) + antiword=LAPPENDBASE(antiword,wordsize,code); + + if (!error) + { + dest[*size]=(doublestrand) ? MINI(word,antiword):word; + (*size)++; + } + + } + + return dest; + +} + +uint32_t ecoCompactHashSequence(pword_t table,uint32_t size) +{ + uint32_t i,j; + word_t current; + + sortword(table,size); + + current = 0; + current=SETMULTIWORD(current); /* build impossible word for the first loop cycle */ + + for (i=0,j=0; j < size;j++) + { + if (table[j]!=current) + { + current =table[j]; + table[i]=current; + i++; + } + else + table[i]=SETMULTIWORD(table[i]); + } + + return i; +} + +const char* ecoUnhashWord(word_t word,uint32_t size) +{ + static char buffer[32]; + static char decode[]="ACGT"; + + uint32_t i; + + for (i=0; i < size; i++) + { + buffer[i]=decode[(word >> (2 * (size - 1 -i))) & 3]; + } + + buffer[size]=0; + + return buffer; +} + +word_t ecoComplementWord(word_t word,uint32_t size) +{ + word_t rep=0; + uint32_t i; + +// DEBUG_LOG("%llx %llx",word,~word); + word=(~word) & WORDMASK(size); + for (i=0;i < size; i++) + { + + rep = RAPPENDBASE(rep,size,word & 3LLU); +// DEBUG_LOG("%016llx %016llx %016llx",word,word & 3LLU,rep); + word>>=2; + } +// DEBUG_LOG("Complemented = %s",ecoUnhashWord(rep,18)); + return rep; + +} + +static int cmpword(const void *x,const void *y) +{ + word_t w1 = *(pword_t)x; + word_t w2 = *(pword_t)y; + + w1 = WORD(w1); + w2 = WORD(w2); + + if (w1 < w2) + return -1; + if (w1 > w2) + return +1; + + return 0; +} + +uint32_t ecoFindWord(pwordcount_t table,word_t word) +{ + pword_t dest; + + dest = (pword_t)bsearch((const void*)&word,(const void*)table->words,table->size,sizeof(word_t),cmpword); + + if (dest) + return dest - table->words; + else + return ~0; +} + diff --git a/src/libecoprimer/libstki.P b/src/libecoprimer/libstki.P new file mode 100644 index 0000000..3b125ef --- /dev/null +++ b/src/libecoprimer/libstki.P @@ -0,0 +1,17 @@ +libstki.o libstki.P : libstki.c /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/string.h libstki.h ecotype.h ecoprimer.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + ../libecoPCR/ecoPCR.h apat.h debug.h diff --git a/src/libecoprimer/libstki.c b/src/libecoprimer/libstki.c new file mode 100644 index 0000000..9bdebf2 --- /dev/null +++ b/src/libecoprimer/libstki.c @@ -0,0 +1,379 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Mar. 92 */ +/* File: libstki.c */ +/* Purpose: A library to deal with 'stacks' of */ +/* integers */ +/* Note: 'stacks' are dynamic (i.e. size is */ +/* automatically readjusted when needed) */ +/* History: */ +/* 00/03/92 : first draft */ +/* 15/08/93 : revised version */ +/* 14/05/99 : last revision */ +/* ==================================================== */ + +#include +#include +#include + +#include "libstki.h" +#include "ecoprimer.h" + + +/* ============================ */ +/* Constantes et Macros locales */ +/* ============================ */ + +#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1) + +#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1) + + +static int16_t sStkiLastError = kStkiNoErr; + +/* -------------------------------------------- */ +/* gestion des erreurs */ +/* get/reset erreur flag */ +/* */ +/* @function: StkiError */ +/* -------------------------------------------- */ + +int16_t StkiError(bool_t reset) +{ + int16_t err; + + err = sStkiLastError; + + if (reset) + sStkiLastError = kStkiNoErr; + + return err; + +} /* end of StkiError */ + +/* -------------------------------------------- */ +/* creation d'un stack */ +/* */ +/* @function: NewStacki */ +/* -------------------------------------------- */ + +StackiPtr NewStacki(int32_t size) +{ + StackiPtr stki; + + if (! (stki = NEW(Stacki))) + return NULL; + + stki->size = size; + stki->top = 0; + stki->cursor = 0; + + if ( ! (stki->val = NEWN(int32_t, size))) { + sStkiLastError = kStkiMemErr; + return FreeStacki(stki); + } + + return stki; + +} /* end of NewStacki */ + + +/* -------------------------------------------- */ +/* liberation d'un stack */ +/* */ +/* @function: FreeStacki */ +/* -------------------------------------------- */ + +StackiPtr FreeStacki(StackiPtr stki) +{ + if (stki) { + if (stki->val) + ECOFREE(stki->val,"Free stack values"); + ECOFREE(stki,"Free stack"); + } + + return NULL; + +} /* end of FreeStacki */ + +/* -------------------------------------------- */ +/* creation d'un vecteur de stacks */ +/* */ +/* @function: NewStackiVector */ +/* -------------------------------------------- */ + +StackiHdle NewStackiVector(int32_t vectSize, int32_t stackSize) +{ + int32_t i; + StackiHdle stkh; + + if (! (stkh = NEWN(StackiPtr, vectSize))) { + sStkiLastError = kStkiMemErr; + return NULL; + } + + for (i = 0 ; i < vectSize ; i++) + if (! (stkh[i] = NewStacki(stackSize))) + return FreeStackiVector(stkh, i); + + return stkh; + +} /* end of NewStackiVector */ + + +/* -------------------------------------------- */ +/* liberation d'un vecteur de stacks */ +/* */ +/* @function: FreeStackiVector */ +/* -------------------------------------------- */ + +StackiHdle FreeStackiVector(StackiHdle stkh, int32_t vectSize) +{ + int32_t i; + + if (stkh) { + for (i = 0 ; i < vectSize ; i++) + (void) FreeStacki(stkh[i]); + ECOFREE(stkh,"Free stack vector"); + } + + return NULL; + +} /* end of FreeStackiVector */ + +/* -------------------------------------------- */ +/* resize d'un stack */ +/* */ +/* @function: ResizeStacki */ +/* -------------------------------------------- */ + +int32_t ResizeStacki(StackiHdle stkh, int32_t size) +{ + int32_t resize = 0; /* assume error */ + int32_t *val; + + if ((val = ECOREALLOC((*stkh)->val, size * sizeof(int32_t),"Cannot reallocate stack values"))) { + (*stkh)->size = resize = size; + (*stkh)->val = val; + } + + if (! resize) + sStkiLastError = kStkiMemErr; + + return resize; + +} /* end of ResizeStacki */ + +/* -------------------------------------------- */ +/* empilage(/lement) */ +/* */ +/* @function: PushiIn */ +/* -------------------------------------------- */ + +bool_t PushiIn(StackiHdle stkh, int32_t val) +{ + if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh))) + return FALSE; + + (*stkh)->val[((*stkh)->top)++] = val; + + return TRUE; + +} /* end of PushiIn */ + +/* -------------------------------------------- */ +/* depilage(/lement) */ +/* */ +/* @function: PopiOut */ +/* -------------------------------------------- */ + +bool_t PopiOut(StackiHdle stkh, int32_t *val) +{ + if ((*stkh)->top <= 0) + return FALSE; + + *val = (*stkh)->val[--((*stkh)->top)]; + + if ( ((*stkh)->top < ((*stkh)->size >> 1)) + && ((*stkh)->top > kMinStackiSize)) + + (void) ShrinkStack(stkh); + + return TRUE; + +} /* end of PopiOut */ + +/* -------------------------------------------- */ +/* lecture descendante */ +/* */ +/* @function: ReadiDown */ +/* -------------------------------------------- */ + +bool_t ReadiDown(StackiPtr stki, int32_t *val) +{ + if (stki->cursor <= 0) + return FALSE; + + *val = stki->val[--(stki->cursor)]; + + return TRUE; + +} /* end of ReadiDown */ + +/* -------------------------------------------- */ +/* lecture ascendante */ +/* */ +/* @function: ReadiUp */ +/* -------------------------------------------- */ + +bool_t ReadiUp(StackiPtr stki, int32_t *val) +{ + if (stki->cursor >= stki->top) + return FALSE; + + *val = stki->val[(stki->cursor)++]; + + return TRUE; + +} /* end of ReadiUp */ + +/* -------------------------------------------- */ +/* remontee/descente du curseur */ +/* */ +/* @function: CursiToTop */ +/* @function: CursiToBottom */ +/* -------------------------------------------- */ + +void CursiToTop(StackiPtr stki) +{ + stki->cursor = stki->top; + +} /* end of CursiToTop */ + +void CursiToBottom(stki) + StackiPtr stki; +{ + stki->cursor = 0; + +} /* end of CursiToBottom */ + +/* -------------------------------------------- */ +/* echange des valeurs cursor <-> (top - 1) */ +/* */ +/* @function: CursiSwap */ +/* -------------------------------------------- */ + +void CursiSwap(StackiPtr stki) +{ + int32_t tmp; + + if ((stki->top <= 0) || (stki->cursor < 0)) + return; + + tmp = stki->val[stki->cursor]; + stki->val[stki->cursor] = stki->val[stki->top - 1]; + stki->val[stki->top - 1] = tmp; + +} /* end of CursiSwap */ + +/* -------------------------------------------- */ +/* Recherche d'une valeur en stack a partir du */ +/* curseur courant en descendant. */ +/* on laisse le curseur a l'endroit trouve */ +/* */ +/* @function: SearchDownStacki */ +/* -------------------------------------------- */ + +bool_t SearchDownStacki(StackiPtr stki, int32_t sval) +{ + int32_t val; + bool_t more; + + while ((more = ReadiDown(stki, &val))) + if (val == sval) + break; + + return more; + +} /* end of SearchDownStacki */ + +/* -------------------------------------------- */ +/* Recherche dichotomique d'une valeur en stack */ +/* le stack est suppose trie par valeurs */ +/* croissantes. */ +/* on place le curseur a l'endroit trouve */ +/* */ +/* @function: BinSearchStacki */ +/* -------------------------------------------- */ + +bool_t BinSearchStacki(StackiPtr stki, int32_t sval) +{ + int32_t midd, low, high, span; + + low = 0; + high = stki->top - 1; + + while (high >= low) { + + midd = (high + low) / 2; + + span = stki->val[midd] - sval; + + if (span == 0) { + stki->cursor = midd; + return TRUE; + } + + if (span > 0) + high = midd - 1; + else + low = midd + 1; + } + + return FALSE; + +} /* end of BinSearchStacki */ + +/* -------------------------------------------- */ +/* teste l'egalite *physique* de deux stacks */ +/* */ +/* @function: SameStacki */ +/* -------------------------------------------- */ + +bool_t SameStacki(StackiPtr stki1, StackiPtr stki2) +{ + if (stki1->top != stki2->top) + return FALSE; + + return ((memcmp(stki1->val, stki2->val, + stki1->top * sizeof(int32_t)) == 0) ? TRUE : FALSE); + +} /* end of SameStacki */ + + +/* -------------------------------------------- */ +/* inverse l'ordre des elements dans un stack */ +/* */ +/* @function: ReverseStacki */ +/* -------------------------------------------- */ + +bool_t ReverseStacki(StackiPtr stki) +{ + int32_t *t, *b, swp; + + if (stki->top <= 0) + return FALSE; + + b = stki->val; + t = b + stki->top - 1; + + while (t > b) { + swp = *t; + *t-- = *b; + *b++ = swp; + } + + return TRUE; + +} /* end of ReverseStacki */ + diff --git a/src/libecoprimer/libstki.h b/src/libecoprimer/libstki.h new file mode 100644 index 0000000..cad7d60 --- /dev/null +++ b/src/libecoprimer/libstki.h @@ -0,0 +1,89 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Mar. 92 */ +/* File: libstki.h */ +/* Purpose: library of dynamic stacks holding */ +/* integer values */ +/* History: */ +/* 00/03/92 : first draft */ +/* 07/07/93 : complete revision */ +/* 10/03/94 : added xxxVector funcs */ +/* 14/05/99 : last revision */ +/* ==================================================== */ + +#ifndef _H_libstki +#define _H_libstki + + +#include "ecotype.h" + +/* ==================================================== */ +/* Constantes de dimensionnement */ +/* ==================================================== */ + +#ifndef kMinStackiSize +#define kMinStackiSize 2 /* taille mini stack */ +#endif + + +#define kStkiNoErr 0 /* ok */ +#define kStkiMemErr 1 /* not enough memory */ + +#define kStkiReset TRUE +#define kStkiGet FALSE + +/* ==================================================== */ +/* Macros standards */ +/* ==================================================== */ + +#ifndef NEW +#define NEW(typ) (typ*)malloc(sizeof(typ)) +#define NEWN(typ, dim) (typ*)malloc((uint32_t)(dim) * sizeof(typ)) +#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (uint32_t)(dim) * sizeof(typ)) +#define FREE(ptr) free((Ptr) ptr) +#endif + + +/* ==================================================== */ +/* Types & Structures de donnees */ +/* ==================================================== */ + + /* -------------------- */ + /* structure : pile */ + /* -------------------- */ +typedef struct Stacki { + /* ---------------------*/ + int32_t size; /* stack size */ + int32_t top; /* current free pos. */ + int32_t cursor; /* current cursor */ + int32_t *val; /* values */ + /* ---------------------*/ +} Stacki, *StackiPtr, **StackiHdle; + + + +/* ==================================================== */ +/* Prototypes (generated by mproto) */ +/* ==================================================== */ + + /* libstki.c */ + +int16_t StkiError (bool_t reset ); +StackiPtr NewStacki (int32_t size ); +StackiPtr FreeStacki (StackiPtr stki ); +StackiHdle NewStackiVector (int32_t vectSize, int32_t stackSize ); +StackiHdle FreeStackiVector (StackiHdle stkh, int32_t vectSize ); +int32_t ResizeStacki (StackiHdle stkh , int32_t size ); +bool_t PushiIn (StackiHdle stkh , int32_t val ); +bool_t PopiOut (StackiHdle stkh , int32_t *val ); +bool_t ReadiDown (StackiPtr stki , int32_t *val ); +bool_t ReadiUp (StackiPtr stki , int32_t *val ); +void CursiToTop (StackiPtr stki ); +void CursiToBottom (StackiPtr stki ); +void CursiSwap (StackiPtr stki ); +bool_t SearchDownStacki (StackiPtr stki , int32_t sval ); +bool_t BinSearchStacki (StackiPtr stki , int32_t sval ); +bool_t SameStacki (StackiPtr stki1 , StackiPtr stki2 ); +bool_t ReverseStacki (StackiPtr stki ); + +#endif /* _H_libstki */ diff --git a/src/libecoprimer/mapping.c b/src/libecoprimer/mapping.c new file mode 100644 index 0000000..96c84bd --- /dev/null +++ b/src/libecoprimer/mapping.c @@ -0,0 +1,7 @@ +/* + * mapping.c + * + * Created on: 25 nov. 2008 + * Author: coissac + */ + diff --git a/src/libecoprimer/merge.P b/src/libecoprimer/merge.P new file mode 100644 index 0000000..fdb5796 --- /dev/null +++ b/src/libecoprimer/merge.P @@ -0,0 +1,17 @@ +merge.o merge.P : merge.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h diff --git a/src/libecoprimer/merge.c b/src/libecoprimer/merge.c new file mode 100644 index 0000000..28ec24b --- /dev/null +++ b/src/libecoprimer/merge.c @@ -0,0 +1,144 @@ +/* + * merge.c + * + * Created on: 11 nov. 2008 + * Author: coissac + */ + +#include "ecoprimer.h" + +static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2); + + +static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data,uint32_t s1,uint32_t s2) +{ + merge->words = data->words; + merge->count = data->strictcount; + merge->write = 0; + merge->read1 = 0; + merge->read2 = s1; + merge->size = s1+s2; + return merge; +} + + +typedef enum {S1=1,S2=2,STACK=3} source_t; + +void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum) +{ + merge_t merged; + source_t source; + word_t currentword,tmpword; + uint32_t currentcount,tmpcount; + int same; + queue_t queue; + int nsame=0; + uint32_t maxcount=0; + bool_t writed=TRUE; + +// DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2) + + (void)mergeInit(&merged,data,s1,s2); + (void)newQueue(&queue,MINI(s1,s2)); + + while (merged.read1 < s1 && merged.read2 < merged.size) + { + if (! queue.empty) + { + currentword = queue.words[queue.pop]; + currentcount = queue.count[queue.pop]; + source=STACK; + } + else + { + currentword = merged.words[merged.read1]; + currentcount = merged.count[merged.read1]; + source=S1; + } + + if (WORD(currentword) > WORD(merged.words[merged.read2])) + { + currentword = merged.words[merged.read2]; + currentcount = merged.count[merged.read2]; + source = S2; + } + + same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2])); + nsame+=same; + +// DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same) + + tmpword = merged.words[merged.write]; + tmpcount= merged.count[merged.write]; + + merged.words[merged.write] = currentword; + merged.count[merged.write] = currentcount; + + if (source != S2) + { + if (same) + { + merged.count[merged.write]+=merged.count[merged.read2]; + + if (ISMULTIWORD(currentword) || ISMULTIWORD(merged.words[merged.read2])) + merged.words[merged.write]=SETMULTIWORD(currentword); + + merged.read2++; + } + + if (source==STACK) + pop(&queue); + merged.read1++; + } + else + merged.read2++; + + if (writed && merged.read1 <= merged.write && merged.write < s1) + push(&queue,tmpword,tmpcount); + + if (merged.count[merged.write] > maxcount) + maxcount=merged.count[merged.write]; + + writed = remainingSeq + merged.count[merged.write] >= seqQuorum; + if (writed) + merged.write++; + + +// else +// DEBUG_LOG("Remove word : %s count : %d remainingSeq : %d total : %d Quorum : %d", +// ecoUnhashWord(currentword,18),merged.count[merged.write],remainingSeq,maxcount+remainingSeq,seqQuorum); + + } /* while loop */ + +// DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty) + + + if (merged.read2 < merged.size) + for (;merged.read2 < merged.size;merged.read2++) + { + merged.words[merged.write]=merged.words[merged.read2]; + merged.count[merged.write]=merged.count[merged.read2]; + if (remainingSeq + merged.count[merged.write] >= seqQuorum) + merged.write++; + + } + else while (! queue.empty) + { +// DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty) + merged.words[merged.write]=queue.words[queue.pop]; + merged.count[merged.write]=queue.count[queue.pop]; + pop(&queue); + if (remainingSeq + merged.count[merged.write] >= seqQuorum) + merged.write++; + } + + data->size = merged.write; + + cleanQueue(&queue); + +// DEBUG_LOG("Max count : %d remainingSeq : %d total : %d Quorum : %d",maxcount,remainingSeq,maxcount+remainingSeq,seqQuorum) +// DEBUG_LOG("Second word : %s",ecoUnhashWord(data->words[1],18)) +// DEBUG_LOG("Last word : %s",ecoUnhashWord(data->words[data->size-1],18)) + + +} diff --git a/src/libecoprimer/pairs.P b/src/libecoprimer/pairs.P new file mode 100644 index 0000000..61a7976 --- /dev/null +++ b/src/libecoprimer/pairs.P @@ -0,0 +1,17 @@ +pairs.o pairs.P : pairs.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h /usr/include/string.h diff --git a/src/libecoprimer/pairs.c b/src/libecoprimer/pairs.c new file mode 100644 index 0000000..a5308a6 --- /dev/null +++ b/src/libecoprimer/pairs.c @@ -0,0 +1,321 @@ +/* + * pairs.c + * + * Created on: 15 dŽc. 2008 + * Author: coissac + */ + +#include "ecoprimer.h" +#include + +primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options); + +int32_t pairinlist (ppairs_t pairlist, word_t w1, word_t w2, uint32_t size) +{ + uint32_t i; + + for (i = 0; i < size; i++) + { + if (w1 == pairlist[i].w1 && w2 == pairlist[i].w2) return i; + if (w1 == pairlist[i].w2 && w2 == pairlist[i].w1) return i; + } + return -1; +} + +char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid) +{ + uint32_t i; + uint32_t j; + char *ampused = NULL; + + if(pair->ampsetcount == 0) + { + pair->ampsetcount = 500; + pair->ampsetindex = 0; + pair->ampset = ECOMALLOC(pair->ampsetcount * sizeof(ampseqset_t),"Cannot allocate amplifia set"); + } + + for (i = 0; i < pair->ampsetindex; i++) + { + if (strcmp (pair->ampset[i].amplifia, amplifia) == 0) + { + ampused = pair->ampset[i].amplifia; + break; + } + } + + if (i == 0) + { + pair->ampset[i].seqidcount = 100; + pair->ampset[i].seqidindex = 0; + pair->ampset[i].taxonids = ECOMALLOC(pair->ampset[i].seqidcount * sizeof(uint32_t),"Cannot allocate amplifia sequence table"); + } + + if (pair->ampsetindex == pair->ampsetcount) + { + pair->ampsetcount += 500; + pair->ampset = ECOREALLOC(pair->ampset, pair->ampsetcount * sizeof(ampseqset_t), "Cannot allocate amplifia set"); + } + + if (pair->ampset[i].seqidindex == pair->ampset[i].seqidcount) + { + pair->ampset[i].seqidcount += 100; + pair->ampset[i].taxonids = ECOREALLOC(pair->ampset[i].taxonids, pair->ampset[i].seqidcount * sizeof(int32_t), "Cannot allocate amplifia sequence table"); + } + + if (pair->ampset[i].amplifia == NULL) + { + pair->ampset[i].amplifia = amplifia; + pair->ampsetindex++; + } + + for (j = 0; j < pair->ampset[i].seqidindex; j++) + { + if (pair->ampset[i].taxonids[j] == taxid) break; + } + + if (j == pair->ampset[i].seqidindex) + pair->ampset[i].taxonids[pair->ampset[i].seqidindex++] = taxid; + return ampused; +} + +void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia) +{ + uint32_t i; + uint32_t j; + + if(pair->taxsetcount == 0) + { + pair->taxsetcount = 500; + pair->taxsetindex = 0; + pair->taxset = ECOMALLOC(pair->taxsetcount * sizeof(taxampset_t),"Cannot allocate taxon set"); + } + + for (i = 0; i < pair->taxsetindex; i++) + { + if (pair->taxset[i].taxonid == taxid) break; + } + + if (i == 0) + { + pair->taxset[i].amplifiacount = 100; + pair->taxset[i].amplifiaindex = 0; + pair->taxset[i].amplifia = ECOMALLOC(pair->taxset[i].amplifiacount * sizeof(char *),"Cannot allocate amplifia table"); + } + + if (pair->taxsetindex == pair->taxsetcount) + { + pair->taxsetcount += 500; + pair->taxset = ECOREALLOC(pair->taxset, pair->taxsetcount * sizeof(taxampset_t), "Cannot allocate taxon set"); + } + + if (pair->taxset[i].amplifiaindex == pair->taxset[i].amplifiacount) + { + pair->taxset[i].amplifiacount += 100; + pair->taxset[i].amplifia = ECOREALLOC(pair->taxset[i].amplifia, pair->taxset[i].amplifiacount * sizeof(char *), "Cannot allocate amplifia table"); + } + + if (pair->taxset[i].taxonid == 0) + { + pair->taxset[i].taxonid = taxid; + pair->taxsetindex++; + } + + for (j = 0; j < pair->taxset[i].amplifiaindex; j++) + { + if (strcmp(pair->taxset[i].amplifia[j], amplifia) == 0) break; + } + + if (j == pair->taxset[i].amplifiaindex) + { + pair->taxset[i].amplifia[j] = amplifia; + pair->taxset[i].amplifiaindex++; + } +} + +char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len) +{ + char *amplifia = ECOMALLOC((len + 1) * sizeof(char),"Cannot allocate amplifia"); + char *seqc = &seq->SQ[start]; + + strncpy(amplifia, seqc, len); + return amplifia; +} + + +/*TR: Added*/ +pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options) +{ + uint32_t i; + uint32_t j; + uint32_t k; + uint32_t d; + uint32_t strt; + uint32_t end; + uint32_t paircount = 0; + uint32_t pairslots = 500; + int32_t foundindex; + ppairs_t pairs; + pairscount_t primerpairs; + primermatchcount_t seqmatchcount; + word_t w1; + word_t w2; + char *amplifia; + char *oldamp; + + + pairs = ECOMALLOC(pairslots * sizeof(pairs_t),"Cannot allocate pairs table"); + + for (i=0; i < seqdbsize; i++) + { + seqmatchcount = buildPrimerPairsForOneSeq(i, primers, options); + if (seqmatchcount.matchcount == 0) continue; + + for (j=0; j < seqmatchcount.matchcount; j++) + { + strt = 0; + w1 = seqmatchcount.matches[j].word; + /*first word should b on direct strand*/ + if (!seqmatchcount.matches[j].strand) + w1 = ecoComplementWord(w1, options->primer_length); + else + strt = options->primer_length; + + for (k=j+1; k < seqmatchcount.matchcount; k++) + { + end = 0; + w2 = seqmatchcount.matches[k].word; + /*second word should be on reverse strand*/ + if (seqmatchcount.matches[k].strand) + w2 = ecoComplementWord(w2, options->primer_length); + else + end = options->primer_length; + + if (!(seqmatchcount.matches[j].good || seqmatchcount.matches[k].good)) continue; + if (w1 == w2) continue; + + d = seqmatchcount.matches[k].position - seqmatchcount.matches[j].position; + if (d >= options->lmin && d <= options->lmax) + { + /*get amplified string*/ + amplifia = getamplifia (seqdb[i], seqmatchcount.matches[j].position + strt, d - strt - end); + + foundindex = pairinlist(pairs, w1, w2, paircount); + if (foundindex != -1) /*pair is found*/ + { + if (seqdb[i]->isexample) + pairs[foundindex].inexample++; + else + pairs[foundindex].outexample++; + + if (pairs[foundindex].mind > d) pairs[foundindex].mind = d; + else if (pairs[foundindex].maxd < d) pairs[foundindex].maxd = d; + + oldamp = addamplifiasetelem (&pairs[foundindex], amplifia, seqdb[i]->ranktaxonid); + /*if exact same string is already in amplifia set then use that for taxon set, it will help for + * calculating the fully identified taxons i.e specificity, we will compare pointrs instead of strings + * because same string means same pointer*/ + if (oldamp) + { + ECOFREE (amplifia, "free amplifia"); + amplifia = oldamp; + } + addtaxampsetelem (&pairs[foundindex], seqdb[i]->ranktaxonid, amplifia); + + continue; + } + + if (paircount == pairslots) + { + pairslots += 500; + pairs = ECOREALLOC(pairs, pairslots * sizeof(pairs_t), "Cannot allocate pairs table"); + } + pairs[paircount].w1 = w1; + pairs[paircount].w2 = w2; + if (seqdb[i]->isexample) pairs[paircount].inexample = 1; + else pairs[paircount].outexample = 1; + pairs[paircount].mind = d; + pairs[paircount].maxd = d; + oldamp = addamplifiasetelem (&pairs[paircount], amplifia, seqdb[i]->ranktaxonid); + addtaxampsetelem (&pairs[paircount], seqdb[i]->ranktaxonid, amplifia); + + paircount++; + } + else if (d > options->lmax) + break; /*once if the distance is greater than lmax then it will keep on increasing*/ + } + } + ECOFREE(seqmatchcount.matches, "Cannot free matches table"); + } + primerpairs.pairs = ECOREALLOC(pairs, paircount * sizeof(pairs_t), "Cannot allocate pairs table"); + primerpairs.paircount = paircount; + return primerpairs; +} + +primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options) +{ + uint32_t i,j,k; + uint32_t matchcount=0; + pprimermatch_t matches = NULL; + primermatchcount_t seqmatchcount; + + seqmatchcount.matchcount = 0; + seqmatchcount.matches = NULL; + + for (i=0;i < primers->size; i++) + { + matchcount+=primers->primers[i].directCount[seqid]; + matchcount+=primers->primers[i].reverseCount[seqid]; + } + + if (matchcount <= 0) return seqmatchcount; + matches = ECOMALLOC(matchcount * sizeof(primermatch_t),"Cannot allocate primers match table"); + + for (i=0,j=0;i < primers->size; i++) + { + if (primers->primers[i].directCount[seqid]) + { + if (primers->primers[i].directCount[seqid]==1) + { + matches[j].word = primers->primers[i].word; + matches[j].strand=TRUE; + matches[j].good=primers->primers[i].good;/*TR: Added*/ + matches[j].position=primers->primers[i].directPos[seqid].value; + j++; + } + else for (k=0; k < primers->primers[i].directCount[seqid]; k++,j++) + { + matches[j].word = primers->primers[i].word; + matches[j].strand=TRUE; + matches[j].good=primers->primers[i].good;/*TR: Added*/ + matches[j].position=primers->primers[i].directPos[seqid].pointer[k]; + } + } + + if (primers->primers[i].reverseCount[seqid]) + { + if (primers->primers[i].reverseCount[seqid]==1) + { + matches[j].word = primers->primers[i].word; + matches[j].strand=FALSE; + matches[j].good=primers->primers[i].good;/*TR: Added*/ + matches[j].position=primers->primers[i].reversePos[seqid].value; + j++; + } + else for (k=0; k < primers->primers[i].reverseCount[seqid]; k++,j++) + { + matches[j].word = primers->primers[i].word; + matches[j].strand=FALSE; + matches[j].good=primers->primers[i].good;/*TR: Added*/ + matches[j].position=primers->primers[i].reversePos[seqid].pointer[k]; + } + } + } + + sortmatch(matches,matchcount); // sort in asscending order by position + + /*TR: Added*/ + seqmatchcount.matches = matches; + seqmatchcount.matchcount = matchcount; + return seqmatchcount; +} diff --git a/src/libecoprimer/queue.P b/src/libecoprimer/queue.P new file mode 100644 index 0000000..e7a6bc9 --- /dev/null +++ b/src/libecoprimer/queue.P @@ -0,0 +1,17 @@ +queue.o queue.P : queue.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h diff --git a/src/libecoprimer/queue.c b/src/libecoprimer/queue.c new file mode 100644 index 0000000..7d11b25 --- /dev/null +++ b/src/libecoprimer/queue.c @@ -0,0 +1,100 @@ +/* + * queue.c + * + * Created on: 14 nov. 2008 + * Author: coissac + */ + +#include "ecoprimer.h" + + + +pqueue_t newQueue(pqueue_t queue, uint32_t size) +{ + if (!queue) + queue = ECOMALLOC(sizeof(queue_t),"Cannot allocate queue structure"); + + queue->size=0; + + resizeQueue(queue,size); + + return queue; + +} + +pqueue_t resizeQueue(pqueue_t queue, uint32_t size) +{ + queue->pop=0; + queue->push=0; + queue->empty=TRUE; + queue->full=FALSE; + + if (!queue->size) + { + queue->count=ECOMALLOC(size * sizeof(uint32_t), + "Cannot allocate count queue array" + ); + queue->words=ECOMALLOC(size * sizeof(word_t), + "Cannot allocate word queue array" + ); + queue->size=size; + } + else if (size > queue->size) + { + queue->count=ECOREALLOC(queue->count, + size * sizeof(uint32_t), + "Cannot allocate count queue array" + ); + queue->words=ECOREALLOC(queue->words, + size * sizeof(word_t), + "Cannot allocate word queue array" + ); + + queue->size=size; + } + + return queue; +} + +pqueue_t cleanQueue(pqueue_t queue) +{ + if (queue->size) + { + if (queue->count) + ECOFREE(queue->count,"Free count queue"); + if (queue->words) + ECOFREE(queue->words,"Free words queue"); + } + + queue->size=0; + + return queue; +} + +void push(pqueue_t queue, word_t word, uint32_t count) +{ + ECO_ASSERT(!queue->full,"Queue is full"); + + queue->count[queue->push]=count; + queue->words[queue->push]=word; + + queue->push++; + + if (queue->push==queue->size) + queue->push=0; + + queue->full=queue->push==queue->pop; + queue->empty=FALSE; +} + +void pop(pqueue_t queue) +{ + ECO_ASSERT(!queue->empty,"Queue is empty"); + queue->pop++; + + if (queue->pop==queue->size) + queue->pop=0; + + queue->empty=queue->push==queue->pop; + queue->full=FALSE; +} diff --git a/src/libecoprimer/readdnadb.P b/src/libecoprimer/readdnadb.P new file mode 100644 index 0000000..9d16e11 --- /dev/null +++ b/src/libecoprimer/readdnadb.P @@ -0,0 +1,17 @@ +readdnadb.o readdnadb.P : readdnadb.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h diff --git a/src/libecoprimer/readdnadb.c b/src/libecoprimer/readdnadb.c new file mode 100644 index 0000000..a148510 --- /dev/null +++ b/src/libecoprimer/readdnadb.c @@ -0,0 +1,35 @@ +/* + * readdnadb.c + * + * Created on: 7 nov. 2008 + * Author: coissac + */ + +#include "ecoprimer.h" + +pecodnadb_t readdnadb(const char *name, uint32_t *size) +{ + ecoseq_t *seq; + uint32_t buffsize=100; + pecodnadb_t db; + + db = ECOMALLOC(buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory"); + + + for(seq=ecoseq_iterator(name), *size=0; + seq; + seq=ecoseq_iterator(NULL), (*size)++ + ) + { + if (*size==buffsize) + { + buffsize*=2; + db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory"); + } + db[*size]=seq; + }; + + db = ECOREALLOC(db,(*size)*sizeof(ecoseq_t*),"I cannot allocate db memory"); + + return db; +} diff --git a/src/libecoprimer/smothsort.P b/src/libecoprimer/smothsort.P new file mode 100644 index 0000000..2441cad --- /dev/null +++ b/src/libecoprimer/smothsort.P @@ -0,0 +1,10 @@ +smothsort.o smothsort.P : smothsort.c /usr/include/assert.h /usr/include/sys/cdefs.h \ + /usr/include/stdio.h /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/sys/types.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/sys/_structs.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h diff --git a/src/libecoprimer/smothsort.c b/src/libecoprimer/smothsort.c new file mode 100644 index 0000000..72ee444 --- /dev/null +++ b/src/libecoprimer/smothsort.c @@ -0,0 +1,265 @@ +/* + * This file is part of the Sofia-SIP package + * + * Copyright (C) 2005 Nokia Corporation. + * + * Contact: Pekka Pessi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +/**@file smoothsort.c + * @brief Smoothsort implementation + * + * Smoothsort is a in-place sorting algorithm with performance of O(NlogN) + * in worst case and O(n) in best case. + * + * @sa + * "Smoothsort, an alternative for sorting in-situ", E.D. Dijkstra, EWD796a, + * <http://www.enterag.ch/hartwig/order/smoothsort.pdf>. + * + * @author Pekka Pessi + */ + + +#include +#include +#include +#include /* add sto switch from size_t to uint32_t */ + +/** Description of current stretch */ +typedef struct { + uint32_t b, c; /**< Leonardo numbers */ + unsigned long long p; /**< Concatenation codification */ +} stretch; + +/** Description of array */ +typedef struct +{ + void *m; + int (*less)(void *m, uint32_t a, uint32_t b); + void (*swap)(void *m, uint32_t a, uint32_t b); +} array; + +static inline uint32_t stretch_up(stretch s[1]) +{ + uint32_t next; + + s->p >>= 1; + + next = s->b + s->c + 1, s->c = s->b, s->b = next; + + return next; +} + +static inline uint32_t stretch_down(stretch s[1], unsigned bit) +{ + uint32_t next; + + s->p <<= 1, s->p |= bit; + + next = s->c, s->c = s->b - s->c - 1, s->b = next; + + return next; +} + +#if DEBUG_SMOOTHSORT +static char const *binary(unsigned long long p) +{ + static char binary[65]; + int i; + + if (p == 0) + return "0"; + + binary[64] = 0; + + for (i = 64; p; p >>= 1) + binary[--i] = "01"[p & 1]; + + return binary + i; +} +#else +#define DEBUG(x) ((void)0) +#endif + +/** + * Sift the root of the stretch. + * + * The low values are sifted up (towards index 0) from root. + * + * @param array description of array to sort + * @param r root of the stretch + * @param s description of current stretch + */ +static void sift(array const *array, uint32_t r, stretch s) +{ + while (s.b >= 3) { + uint32_t r2 = r - s.b + s.c; + + if (!array->less(array->m, r - 1, r2)) { + r2 = r - 1; + stretch_down(&s, 0); + } + + if (array->less(array->m, r2, r)) + break; + + DEBUG(("\tswap(%p @%zu <=> @%zu)\n", array, r, r2)); + + array->swap(array->m, r, r2); r = r2; + + stretch_down(&s, 0); + } +} + +/** Trinkle the roots of the given stretches + * + * @param array description of array to sort + * @param r root of the stretch + * @param s description of stretches to concatenate + */ +static void trinkle(array const *array, uint32_t r, stretch s) +{ + DEBUG(("trinkle(%p, %zu, (%u, %s))\n", array, r, s.b, binary(s.p))); + + while (s.p != 0) { + uint32_t r2, r3; + + while ((s.p & 1) == 0) + stretch_up(&s); + + if (s.p == 1) + break; + + r3 = r - s.b; + + if (array->less(array->m, r3, r)) + break; + + s.p--; + + if (s.b < 3) { + DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r3, s.b)); + array->swap(array->m, r, r3); r = r3; + continue; + } + + r2 = r - s.b + s.c; + + if (array->less(array->m, r2, r - 1)) { + r2 = r - 1; + stretch_down(&s, 0); + } + + if (array->less(array->m, r2, r3)) { + DEBUG(("swap(%p [%zu]=[%zu])\n", array, r, r3)); + array->swap(array->m, r, r3); r = r3; + continue; + } + + DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r2, s.b)); + array->swap(array->m, r, r2); r = r2; + stretch_down(&s, 0); + break; + } + + sift(array, r, s); +} + +/** Trinkles the stretches when the adjacent stretches are already trusty. + * + * @param array description of array to sort + * @param r root of the stretch + * @param stretch description of stretches to trinkle + */ +static void semitrinkle(array const *array, uint32_t r, stretch s) +{ + uint32_t r1 = r - s.c; + + DEBUG(("semitrinkle(%p, %zu, (%u, %s))\n", array, r, s.b, binary(s.p))); + + if (array->less(array->m, r, r1)) { + DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r1, s.b)); + array->swap(array->m, r, r1); + trinkle(array, r1, s); + } +} + +/** Sort array using smoothsort. + * + * Sort @a N elements from array @a base starting with index @a r with smoothsort. + * + * @param base pointer to array + * @param r lowest index to sort + * @param N number of elements to sort + * @param less comparison function returning nonzero if m[a] < m[b] + * @param swap swapper function exchanging elements m[a] and m[b] + */ +void su_smoothsort(void *base, uint32_t r, uint32_t N, + int (*less)(void *m, uint32_t a, uint32_t b), + void (*swap)(void *m, uint32_t a, uint32_t b)) +{ + stretch s = { 1, 1, 1 }; + uint32_t q; + + array const array[1] = {{ base, less, swap }}; + + assert(less && swap); + + if (base == NULL || N <= 1 || less == NULL || swap == NULL) + return; + + DEBUG(("\nsmoothsort(%p, %zu)\n", array, nmemb)); + + for (q = 1; q != N; q++, r++, s.p++) { + DEBUG(("loop0 q=%zu, b=%u, p=%s \n", q, s.b, binary(s.p))); + + if ((s.p & 7) == 3) { + sift(array, r, s), stretch_up(&s), stretch_up(&s); + } + else /* if ((s.p & 3) == 1) */ { assert((s.p & 3) == 1); + if (q + s.c < N) + sift(array, r, s); + else + trinkle(array, r, s); + + while (stretch_down(&s, 0) > 1) + ; + } + } + + trinkle(array, r, s); + + for (; q > 1; q--) { + s.p--; + + DEBUG(("loop1 q=%zu: b=%u p=%s\n", q, s.b, binary(s.p))); + + if (s.b <= 1) { + while ((s.p & 1) == 0) + stretch_up(&s); + --r; + } + else /* if b >= 3 */ { + if (s.p) semitrinkle(array, r - (s.b - s.c), s); + stretch_down(&s, 1); + semitrinkle(array, --r, s); + stretch_down(&s, 1); + } + } +} diff --git a/src/libecoprimer/sortmatch.P b/src/libecoprimer/sortmatch.P new file mode 100644 index 0000000..721cd46 --- /dev/null +++ b/src/libecoprimer/sortmatch.P @@ -0,0 +1,17 @@ +sortmatch.o sortmatch.P : sortmatch.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h /usr/include/math.h /usr/include/architecture/i386/math.h diff --git a/src/libecoprimer/sortmatch.c b/src/libecoprimer/sortmatch.c new file mode 100644 index 0000000..f3771b7 --- /dev/null +++ b/src/libecoprimer/sortmatch.c @@ -0,0 +1,51 @@ +/* + * sortmatch.c + * + * Created on: 15 dŽc. 2008 + * Author: coissac + */ + +/* + * sortword.c + * + * + * Created on: 6 nov. 2008 + * Author: coissac + */ + +#include "ecoprimer.h" +#include + +void su_smoothsort(void *base, uint32_t r, uint32_t N, + int (*less)(void *m, uint32_t a, uint32_t b), + void (*swap)(void *m, uint32_t a, uint32_t b)); + +static int less(void *m, uint32_t a, uint32_t b); +static void swap(void *m, uint32_t a, uint32_t b); + + +void sortmatch(pprimermatch_t table,uint32_t N) +{ + su_smoothsort((void*)table,0,N,less,swap); +} + +int less(void *m, uint32_t a, uint32_t b) +{ + pprimermatch_t t; + + t = (pprimermatch_t)m; + + return t[a].position <= t[b].position; +} + +void swap(void *m, uint32_t a, uint32_t b) +{ + primermatch_t tmp; + pprimermatch_t t; + + t = (pprimermatch_t)m; + tmp = t[a]; + t[a]= t[b]; + t[b]= tmp; +} + diff --git a/src/libecoprimer/sortword.P b/src/libecoprimer/sortword.P new file mode 100644 index 0000000..57df213 --- /dev/null +++ b/src/libecoprimer/sortword.P @@ -0,0 +1,17 @@ +sortword.o sortword.P : sortword.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h /usr/include/math.h /usr/include/architecture/i386/math.h diff --git a/src/libecoprimer/sortword.c b/src/libecoprimer/sortword.c new file mode 100644 index 0000000..389630f --- /dev/null +++ b/src/libecoprimer/sortword.c @@ -0,0 +1,44 @@ +/* + * sortword.c + * + * + * Created on: 6 nov. 2008 + * Author: coissac + */ + +#include "ecoprimer.h" +#include + +void su_smoothsort(void *base, uint32_t r, uint32_t N, + int (*less)(void *m, uint32_t a, uint32_t b), + void (*swap)(void *m, uint32_t a, uint32_t b)); + +static int less(void *m, uint32_t a, uint32_t b); +static void swap(void *m, uint32_t a, uint32_t b); + + +void sortword(pword_t table,uint32_t N) +{ + su_smoothsort((void*)table,0,N,less,swap); +} + +int less(void *m, uint32_t a, uint32_t b) +{ + pword_t t; + + t = (pword_t)m; + + return WORD(t[a]) <= WORD(t[b]); +} + +void swap(void *m, uint32_t a, uint32_t b) +{ + word_t tmp; + pword_t t; + + t = (pword_t)m; + tmp = t[a]; + t[a]= t[b]; + t[b]= tmp; +} + diff --git a/src/libecoprimer/strictprimers.P b/src/libecoprimer/strictprimers.P new file mode 100644 index 0000000..cd887aa --- /dev/null +++ b/src/libecoprimer/strictprimers.P @@ -0,0 +1,18 @@ +strictprimers.o strictprimers.P : strictprimers.c ecoprimer.h /usr/include/inttypes.h \ + /usr/include/sys/cdefs.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \ + debug.h /usr/include/string.h /usr/include/math.h \ + /usr/include/architecture/i386/math.h diff --git a/src/libecoprimer/strictprimers.c b/src/libecoprimer/strictprimers.c new file mode 100644 index 0000000..7cca4e8 --- /dev/null +++ b/src/libecoprimer/strictprimers.c @@ -0,0 +1,170 @@ +/* + * strictprimers.c + * + * Created on: 7 nov. 2008 + * Author: coissac + */ + +#include "ecoprimer.h" +#include +#include + +pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq) +{ + uint32_t i; + uint32_t buffsize; + //wordcount_t t; + + if (!table) + table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure"); + + table->words=NULL; + table->size =0; + table->outseqcount=0; + table->inseqcount=0; + table->strictcount =0; + + if (seq) + { + table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize); + table->size = ecoCompactHashSequence(table->words,buffsize); + + table->inseqcount=1; + table->strictcount =ECOMALLOC((table->size*sizeof(uint32_t)), + "Cannot allocate memory for word count table" + ); + + for (i=0; i < table->size; i++) table->strictcount[i]=1; + } + + return table; +} + +void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq) +{ + uint32_t buffersize; + pword_t newtable; + uint32_t newsize; + uint32_t i; + + buffersize = table->size + ecoWordCount(wordsize,circular,seq); + + table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t), + "Cannot allocate memory to extend word table"); + + + newtable = table->words + table->size; + +// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable); + + (void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize); +// DEBUG_LOG("new seq wordCount : %d",newsize); + + newsize = ecoCompactHashSequence(newtable,newsize); + +// DEBUG_LOG("compacted wordCount : %d",newsize); + buffersize = table->size + newsize; + + // resize the count buffer + + table->inseqcount++; + + + table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t), + "Cannot allocate memory to extend example word count table"); + + for (i=table->size; i < buffersize; i++) + table->strictcount[i]=1; + + + + // Now we have to merge in situ the two tables + + ecomerge(table,table->size,newsize,exampleCount - table->inseqcount,seqQuorum); +// DEBUG_LOG("Dictionnary size : %d",table->size); + +} + +pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, + uint32_t exampleCount,poptions_t options) +{ + uint32_t i; + pwordcount_t strictprimers=NULL; + uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum); + + fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount); + + strictprimers = initCountTable(NULL,options->primer_length, + options->circular, + options->doublestrand, + NULL); + + + for (i=0;iisexample) + { + if (strictprimers->size) + { + uint32_t s; + s = strictprimers->size; +// DEBUG_LOG("stack size : %u",s); + addSeqToWordCountTable(strictprimers,options->primer_length, + options->circular, + options->doublestrand, + exampleCount, + sequenceQuorum, + database[i]); + } + else + strictprimers = initCountTable(strictprimers,options->primer_length, + options->circular, + options->doublestrand, + database[i]); + + } + else + strictprimers->outseqcount++; + + fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10d \r",(int32_t)i+1,(int32_t)seqdbsize,strictprimers->size); + +// DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0]) +// DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1]) + } + + strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount, + sizeof(uint32_t)*strictprimers->size, + "Cannot reallocate strict primer count table"); + strictprimers->words = ECOREALLOC(strictprimers->words, + sizeof(word_t)*strictprimers->size, + "Cannot reallocate strict primer table"); + + return strictprimers; +} + +uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers) +{ + uint32_t i; + uint32_t w; + + for (i=0,w=0;i < strictprimers->size;i++) + { + if (w < i) + { + strictprimers->words[w]=strictprimers->words[i]; + strictprimers->strictcount[w]=strictprimers->strictcount[i]; + } + if (! ISMULTIWORD(strictprimers->words[w])) + w++; + } + + strictprimers->size=w; + strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount, + sizeof(uint32_t)*strictprimers->size, + "Cannot reallocate strict primer count table"); + strictprimers->words = ECOREALLOC(strictprimers->words, + sizeof(word_t)*strictprimers->size, + "Cannot reallocate strict primer table"); + + return w; +}