From 6f5f2a16f31014b2c8b80d4d0e1724d422b95929 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Mon, 12 Oct 2009 13:31:41 +0000 Subject: [PATCH] Change 1 : patch the help message to take into account option added and the new output format. Change 2 : Change the used rules to define example and counter example taxon sets. By default all taxa are example taxa and no counterexample taxa are used. By using -r option (one or several time) you could restict example taxa to a subset of taxa In old version all taxa not in example set are in the counterexample set. Now restrict example set with -r option doesn't define the counter example set. You must use -i to define the conterexample set in a similar way or -r option for example taxa. git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@234 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- .pydevproject | 4 +-- src/ecoprimer.c | 51 ++++++++++++++++++++++------------- src/libecoprimer/ecoprimer.h | 4 ++- src/libecoprimer/goodtaxon.c | 28 +++++++++++++++++++ src/libecoprimer/readdnadb.c | 27 ++++++++++++------- src/libthermo/libthermo.a | Bin 12896 -> 0 bytes 6 files changed, 83 insertions(+), 31 deletions(-) delete mode 100644 src/libthermo/libthermo.a diff --git a/.pydevproject b/.pydevproject index 6725cde..ac30bc7 100644 --- a/.pydevproject +++ b/.pydevproject @@ -1,7 +1,7 @@ - + -Default +Python 2.6 python 2.6 diff --git a/src/ecoprimer.c b/src/ecoprimer.c index 2342318..c55dda3 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -53,37 +53,50 @@ static void PrintHelp() PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n"); PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n"); PP "-h : [H]elp - print help\n\n"); - PP "-i : [I]gnore the given taxonomy id.\n\n"); + PP "-i : [I]gnore the given taxonomy id (define the counterexample taxon set).\n\n"); PP "-l : minimum [L]ength : define the minimum amplication length. \n\n"); PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n"); - PP "-r : [R]estricts the search to the given taxonomic id.\n\n"); + PP "-r : [R]estricts the search to the given taxonomic id (restrict the example taxon set).\n\n"); PP "-c : Consider that the database sequences are [c]ircular\n\n"); -// PP "-3 : Three prime strict match\n\n"); + PP "-3 : Three prime strict match\n\n"); PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n"); PP "-s : [S]ensitivity quorum\n\n"); PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n"); PP "-x : false positive quorum\n\n"); PP "-D : set in [d]ouble strand mode\n\n"); + PP "-O : set the primer length (default 18) \n\n"); PP "-S : Set in [s]ingle strand mode\n\n"); + PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1 or OWCZARZY:2, default=1)\n\n"); + PP "-a : Salt contentration in M for Tm computation (default 0.05 M)\n\n"); PP "-U : No multi match\n\n"); + PP "-U : Define the [R]eference sequence identifier (must be part of example set)\n\n"); + PP "-A : Print the list of all identifier of sequences present in the database\n\n"); + PP "-f : Remove data mining step during strict primer identification\n\n"); + PP "-v : Store statistic file about memory usage during strict primer identification\n\n"); PP "\n"); PP "------------------------------------------\n"); PP "Table result description : \n"); - PP "column 1 : serial number\n"); - PP "column 2 : primer1\n"); - PP "column 3 : primer2\n"); - PP "column 4 : good/bad\n"); - PP "column 5 : in sequence count\n"); - PP "column 6 : out sequence count\n"); - PP "column 7 : yule\n"); - PP "column 8 : in taxa count\n"); - PP "column 9 : out taxa count\n"); - PP "column 10 : coverage\n"); - PP "column 11 : unambiguously identified taxa\n"); - PP "column 12 : specificity\n"); - PP "column 13 : minimum amplified length\n"); - PP "column 14 : maximum amplified length\n"); - PP "column 15 : average amplified length\n"); + PP "column 1 : serial number\n"); + PP "column 2 : primer1\n"); + PP "column 3 : primer2\n"); + PP "column 4 : primer1 Tm without mismatch\n"); + PP "column 5 : primer1 lowest Tm against exemple sequences\n"); + PP "column 6 : primer2 Tm without mismatch\n"); + PP "column 7 : primer2 lowest Tm against exemple sequences\n"); + PP "column 8 : primer1 G+C count\n"); + PP "column 9 : primer2 G+C count\n"); + PP "column 10 : good/bad\n"); + PP "column 11 : amplified example sequence count\n"); + PP "column 12 : amplified counterexample sequence count\n"); + PP "column 13 : yule\n"); + PP "column 14 : amplified example taxa count\n"); + PP "column 15 : amplified counterexample taxa count\n"); + PP "column 16 : ratio of amplified example taxa versus all example taxa (Bc index)\n"); + PP "column 17 : unambiguously identified example taxa count\n"); + PP "column 18 : ratio of specificity unambiguously identified example taxa versus all example taxa (Bs index)\n"); + PP "column 19 : minimum amplified length\n"); + PP "column 20 : maximum amplified length\n"); + PP "column 21 : average amplified length\n"); PP "------------------------------------------\n"); PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n"); PP "------------------------------------------\n\n"); @@ -690,7 +703,7 @@ int main(int argc, char **argv) fprintf(stderr,"Reading sequence database ...\n"); - seqdb = readdnadb(options.prefix,&seqdbsize); + seqdb = readdnadb(options.prefix,taxonomy,&seqdbsize, &options); if (options.printAC) { diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index a25c8ad..195ae57 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -290,9 +290,11 @@ typedef ecoseq_t **pecodnadb_t; void sortword(pword_t table,uint32_t N); -pecodnadb_t readdnadb(const char *name, uint32_t *size); +pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,poptions_t options); int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options); +int isExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options); +int isCounterExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options); uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq); pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size,int32_t *neededWords,uint32_t neededWordCount, diff --git a/src/libecoprimer/goodtaxon.c b/src/libecoprimer/goodtaxon.c index f4d7598..09b47ce 100644 --- a/src/libecoprimer/goodtaxon.c +++ b/src/libecoprimer/goodtaxon.c @@ -25,3 +25,31 @@ int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options) return result; } + +int isExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options) +{ + int result; + + result=( (options->r == 0) || (eco_is_taxid_included(taxonomy, + options->restricted_taxid, + options->r, + taxonomy->taxons->taxon[taxon].taxid) + )); + + return result; +} + + +int isCounterExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options) +{ + int result; + + result=((options->g != 0) && (eco_is_taxid_included(taxonomy, + options->ignored_taxid, + options->g, + taxonomy->taxons->taxon[taxon].taxid) + )); + + + return result; +} diff --git a/src/libecoprimer/readdnadb.c b/src/libecoprimer/readdnadb.c index 98867dd..ced45c5 100644 --- a/src/libecoprimer/readdnadb.c +++ b/src/libecoprimer/readdnadb.c @@ -7,7 +7,7 @@ #include "ecoprimer.h" -pecodnadb_t readdnadb(const char *name, uint32_t *size) +pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,poptions_t options) { ecoseq_t *seq; uint32_t buffsize=100; @@ -18,15 +18,24 @@ pecodnadb_t readdnadb(const char *name, uint32_t *size) for(seq=ecoseq_iterator(name), *size=0; seq; - seq=ecoseq_iterator(NULL), (*size)++ + seq=ecoseq_iterator(NULL) ) { - if (*size==buffsize) - { - buffsize*=2; - db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory"); - } - db[*size]=seq; + if (isExampleTaxon(taxonomy,seq->taxid,options) || + isCounterExampleTaxon(taxonomy,seq->taxid,options)) + { + if (*size==buffsize) + { + buffsize*=2; + db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory"); + } + db[*size]=seq; + (*size)++; + } + else + { + delete_ecoseq(seq); + } }; db = ECOREALLOC(db,(*size)*sizeof(ecoseq_t*),"I cannot allocate db memory"); @@ -47,4 +56,4 @@ void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize) fprintf (stderr, "seq %d = %s\n", i, ch); } exit (0); -} \ No newline at end of file +} diff --git a/src/libthermo/libthermo.a b/src/libthermo/libthermo.a deleted file mode 100644 index be29e706eab94a50cb39fb02a8820752b57f58b9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12896 zcmd^Ge^gZ0oxjMS)FO{;F(q4DCkaW1E*c0JF=698nb9{17!sjkO8F6-5w{3bhn9BD zc5s*sufqhkaSOVf15MLv+jM2OB;b!Q4-^IxObN9oBrz>b>{gqkYs6S$Q)fTld*AR& z(|FSUvFGePy!qbyz2E!&b-(xCckg}NoLRcFto+`^x2E24XNqwqrQVso`mU5Ut5@A= zG_FcXMTMwNO-Z>cJ>6(byAwY=l`JkUUb$&Yj>Wp(xaon1@~jqyluJKI4H1lKfD`|~ z*e!Z_l@5uJ>hpD2p+lDrpU~k?bvO=KTvl39R#Q>xwCC6>oZBn6<=J;s+jo{aYj)b# zmG0VJ#)>PeOLvy;D9!@mCVPb=FJ}&FcCo@F1ZAj1eQLSL5kCX6N+M&o6Ux9`1j4Df zcx$P%G+ZM77CBFzDI0Y@f3Krl(3ALmSSl{A*zQDpRHO@v;^MjbW$OHlI)gUw(@o|W z^4sh*%`Zb>=kqJk`PJ%;H1M0vW4^A%#rE=I2SUSsZGPiAzc^i~j9Uc1+S?d|3)v?b zE7wJFKx7GcTH##kW0MJEFM`uek4d7T=kM^u;|cdAve%s^rML~z`IAy%)JkZmsJ|UP z?vR#S7?YN}h>0TgUcxhsXH4;@e@*en*c5;KSiVi_)quF9L=2-9a_<8DT_vI)94#%z zFq?cd9F)~z#cwdn>IJkq4{|vVWo?t9{z-iq1TW$FIhsEN?!$`v?KmX@!VLW=Hu$pQ zv;B#w?x~2$by8g8_e)@@^CtJy0;!%9RNN=xBvap)Ta-DRk+B83ZRr=Xw!8xPCUhO)GFL89(Ij8mJO)4R`lPh^HOce~Y32CWqlP6@ z|8dFm((t5IG&(62ze*KA&FRnlR82CCQx|NusUi7`az2KodT|rHo*}Iql3KKoHYrOt zEJ6WhJz&7k<`~HnjUbc0y#T+H@Ze&oK*r^$f6h^_aLdWDtASR0_#qo%I{0je27^fC z9^Mc$P4{V#*!kP$9uG{U4w7L5Y!S$R@aSs~QJycEn>ZZc{h)e+) z6ya=Ebko(b*C`HXj?^!j;lXZnO-Cz{Nz0{X-(93k;vg)N?BKh7{R0$Kj zW-c7K@@W&i$} zsG9dfeeU<8YQ~gMP;rM0H9rdty01jl{Mh_#gi&Q+@!MahX<#7Bkpw#sdGH50O7Iay{c(-N^dg z6$_z!Z2!Jzs8I0<#(MVe`_I0B!|~%jxre#l8L+BNF+_7DE$wHZ=^n=lD<5U-Hv>o6 zsA15m9*ACs$%nDa1BcVn+KJ?8Oj+QTi$$`R85wArx?(sg@m9o~dY}_PKD>Y5_hk7P zWLP0{yKGgxE_A=F1~mA?szLwFC9pJS|9lhu~IXu>t^g?vbj zgJXd-LDCV{@dGPr%8i`SMS#Q1^`sSi3Sx;W(0U8N(X_ND%xHOrd-zOhonq&oH0$Q+++Xj&}!tnk3Ug-nlT^h=5 z)YrzFsOmJFk(PFxoC-GZNHdItlPqMU>P-jNo|7XLW~kW1Yg!ECb+!8-Cuq6IOMzC{ ze*`YGpbZmtHv*8CP5^Ztd2rYbJ$fKbg9*7X4&s)P%a5KM$-Wo;&+f3GhX*w{xt6=U zeHo;{?D5Z#%odHjtTrWb|F(1jfHD)ofo?94Q4`I5-F^y2Ar!ue4)Qj`qwvyc@+D-a zt%cAyC=GRT$%JEwQmEK%qKIS?8Xxa_fKgc{eZ`YHY9~im&4-{~R z9ctyJ?rPov-cIhH?z5s~;`|OYAk3`lp&{y==io>hZSirWZ#yG^f%zk7;Wd+7wY!{m zR^T8Hkfzg}PV=s7is2p9R1W~{w}Laes14FEcWuPT zZtnm&m`#xOM=`Wvq&FFn1^-pR6zFDxk&x}A-Vb{B@)+ac76URp@=l=J^lTiM>)Es9 zLZb`HwsdhO-WVhCYH0<4X+!;R$_L0}+Ij^(Y)av=(_O$pO9LFxdvatMeBvGA4rq#p z12EmavKCBg`OM<&fg8Z9lp?tTz1!n(Jf^`T8!@6W z_g~PP0l94AJ2Xw6psaZnq%|!=4?*ZLc>l1&5i65B`NY$0fyLw%ba9{&zJnhdwE*kr zwZcFDt)V{^Qc(?biUD_C%vYHrYIgGh?`;+!S_Hse04!uj$;?&}Rfl_Dwbb_%sh3c$0Rc8F#>6URC< zWceoFyozYe9Ls~%LR)`xBsskv*!}yyp}~2Dzyi|-%*m0F=U};0V7h?8G6tQ!g`WbA z>a9lv!wp6VAsXG@aq4z(o<)CosN_F!MIJp*@ku&Vj8IwLxzqseti<0Xzcs zz>E^6q=%G>0~5yT`;a}$>24vc30N#q+$;!#j|gi97Nd=56A0^i5e5Ey}@Ok#bE6S#@>2f)Hx8Zf9}7gg##1-K2@vT&4N{dn7UeNvOqj~VD!=P6ww5| zXMoX1M>QYu-JK8v&!vm`%!v{oD?#rEz%Vn9nM{&{iJvec=nVqXI5XHsFdgVI)6B8l z*`pi_XXE7_SoHcSU|@19FL}pBsRxq?956+*PtaS)rxd=#w*dovFgYHwU>?H6JIbe{ z?s`l|Abp!i#blBUj6Rvffveu}yFreHfaaBBoWP;cTtU(?SAY(QqCbXo7BfvYu(SZ; zU@8NLU-FmbBDr|@cxcEOFPV|q0BUr4k4*5cV8$TP; z_sjKszr4}kc-4M6Tm9XAbh;GYFDvS=a*}>)OB&Bf`i)I}!KRKWN#j{TIw~>Wj+w`h zu-sGDXLFx0AisuiLBm2qDYQqH6F6oaC&I3`nH&>z)2>%E-$PkWUN=aUbhJb}WLw>F zsEekacR3i?8QI0#wuV$BU(mY zP5oe#`ip5np4Vhmvq;wCshZWJNsr&U0Nz)BmpYt!i61EI#|XWP+*3};docp$_?OZM zRyC!}um=Y#p$SKG1X?Y_vl`DS&`kV4e8>l6H~C?<{p6F`wk~|mwjImlaxNgv3qSN(gKEYse!F(}@@I#9=L%fkJc?LoI*4WimS(4`` zRMmZT)dtDaOjT{+K%a@aDEW;%zTJEm$ePB%p3434g5eu9-A$LQ8ZKB7IZNtSU8>er;lKZl6O zR{wx8hOy7br_{UfFOIJeCXCNuVqA^q_TKeHU;#1sn?qNwT>0bZ&HZ(6xR9J@ z)zQAQaewT0A%G;ysN$CwD3WD}jVbO|B9q>AzaOFIG<^MbY4bbM=3!}NP9xTN$$L9O z4`E$9>%D;P->kmfiSZy6C)J4Qw^HYxAG<@ixw{dKvC!l+;yBqB8nE0`(}4H}=lV?a zi!Ak8u{hA$zykD=+e<(0v$E1EJsTfQ)e$IJq~7Pvrk9mgv(y`so;5@(Ww=Dd^<=~_ zsz%MGL3;VX#gMIR3?!HE8ZefPxrM)n82WG(`8>aG^v5?N&ndGba1L@Fw~?1{V^vVG z*NE)P;TIQNO3Lm~1d9A(Qt>ZK1yx*Q8(mmPo?GY!#`V6qO&k-qi8tUn(jYzlD%97< zr!4Lp!}pU?{d+jWraMZh=`G~wqRf&QC)|P?wqbG&%_s(VEPVIKJtzF-8AT)EVBO6t zSJc6USnZ`fIjEc0MKn(amCzuT_ZKKEFtqJ7@}o<9VXZRpt@(TQHR;}J5;wH zb-K=^Eou5N#D_IKw*+h@(*SNHt&MY4!3E`zxI}fqJ8(}u0Y)CRB+`wlzZz`;e_8^_ zCq7JxIuiG*a8!1pxM4-Mu#Mlaf`XGnk0s=Ez@@+EuyDzkD2L%SZ&=T5gFT>|Nf8W= zXP7W%5@rdJPL3Qi)BO}mYw-l|{tf6_I}5KSK1!1L-SRgZo|!sx z@8RYrlfL`xoA-jyr9B^7`|#F#e*J*|<+V7cK}s=m1}=+ewm z%;uj9nLHUN!};TG5cCGxak>qSx@+glN#l+1W1uZ2Z$~Ok%dt=up0FLShYgvLLk%H^ z`mZU{K)cVzh5o3=4V?vJEPZj=1y76F* z1_QNmBR}8HI^cP_s=bgm-_9q9AEm44$vtEqY~?Z+3Yof{7jOaq9hUxHhi~ZcRUIA# zypgdL59#G!V2cjJmmUP9cC_h7`s^WeJI+K_`~Z;Z9|5HLFY0ik4wvfne+DtpuLpC2 z@1ggp-Rpo<-h@D<@>(7K5N^YD?usXLSOZA>cL1WviabD&{$q}gx9a%20Ey4f;5LHv zJsP!h>9AIZ)jGT%knBy=%NJoB(PQt*+m8YdHx>~DK9;^Yz+WLj;A1)fA*YBSM8~!P z;;)b(@G&KT2)QDH2*lVzK>QUF1U@DO5PRn$f{+=T42Zu%g23}{B@og@H1Xl&EcOf_ z)e{6hrUwvRTSO2&AKRtZ69hiS1&9rQ5kU;l*xi8Co*?isxqw(;iwMS`K3lIR2z-nY z5C@S(1mjRo-B0Za0w41{3b8Oe{2kXXsSL-^IW(mscqHJ?qmwB$Aupl6`eso+4M6oS z#0jM?y}m=QSAe7XPCS&>==BXoUSvN4p!yVaBBg)U>r-wPV z(>|&LL`bes_!xK zBC7%bY5`)4r+*Sz?{}sDK8qiXi5~p$!%qWq2~Og7`n%2aaCz=`plv#z@VF{OMn~}mVZFjUKkB>9 z4!v=K-k=2hh=&PdnPgzDE)Vk}NaQr-DX1qu^Z?cZf^Pb|O>!AML`%HFx*~hsY4*9n zFXcS?ey>*XT$1Xmz*X^Eo3s7fqVb}y1g*3bV*B?@IbQb^)kCk-=iUNp&c4jIDZw)) z)!znM*;f;<_@1i;9j!jh=6kLKAQc}R4srw?NhClFUe2>B1ofjXo3Ai&YBoKnQ^%(1 z&w?@0J8eEo;#AOFjicywdLI6ve0JiLPd4G`&=Y_-e3Rx<)_6#ZuYKMpJADsUsB#5w zq2>xMzT8@$Qmy7z3#?MBgscrXY>h_S2DBxErTQXB=E^Dfdd&w%aGr`UD~^|}F5msp zo?*#z8M~$Vrt8GKx$uzZS0KYVYuIdVprTrZ{pU1ZYk~Sf5A9!I2(zY!9LZQ+eRY||_f;SZjIz%fk1b!)AU;g=r>AkAvM)bg_q^hJW_k}KnTEKv=z}GD z!5ljtbG5Xx?`u);u2M~{bTJkcJKpPOwn1lBOUxD%vsx}l32{Rd701{?lh15Y?%@c@ zn*>|VAi6;GBU&EJ1y0!Q&4es2$t`@i@;OD|>aMUC`^`yk>De@kWjpuFM> zfEZ#c=ry147tz~$#ibr(V2_8gu5Rm9v0HMzcmyV~gO89L$-YD=z1i^y8 z2O-PfgTN=|D=5T5KnDtxKr`pPU=x*5-=o)O=rjO4(d!Gf38(v!nO(kZUisSpcu;IF ztIDq|FWptXxoYRu=>?G1wD7-9fFi4EM|FjL2NrBWg$28e{_o)$|H?oM(X7>nvHt}e C?^8km