diff --git a/src/ecoprimer.c b/src/ecoprimer.c index 9651d67..45d2227 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -28,36 +28,6 @@ static int cmpprintedpairs(const void* p1,const void* p2); void* lib_handle = NULL; float (*calcMelTemp)(char*, char*); -void openlibman () -{ - // Open the library. - char* lib_name = "./libPHunterLib.dylib"; - lib_handle = dlopen(lib_name, RTLD_NOW); - if (lib_handle) { - fprintf(stderr, "[%s] dlopen(\"%s\", RTLD_NOW): Successful\n", __FILE__, lib_name); - } - else { - fprintf(stderr, "[%s] Unable to open library: %s\n", - __FILE__, dlerror()); - exit(EXIT_FAILURE); - } - - // Get the symbol addresses. - calcMelTemp = dlsym(lib_handle, "_Z27calculateMeltingTemperaturePcS_"); - if (calcMelTemp) { - fprintf(stderr, "[%s] dlsym(lib_handle, \"addRating\"): Successful\n", __FILE__); - } - else { - fprintf(stderr, "[%s] Unable to get symbol: %s\n", - __FILE__, dlerror()); - exit(EXIT_FAILURE); - } -} - -void closlibman () -{ - if (lib_handle) dlclose(lib_handle); -} /* ----------------------------------------------- */ /* printout help */ /* ----------------------------------------------- */ @@ -157,8 +127,9 @@ void initoptions(poptions_t options) options->no_multi_match=FALSE; options->pnparm = NULL; strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/ - options->saltmethod = SALT_METHOD_SANTALUCIA; + options->saltmethod = SALT_METHOD_SANTALUCIA; options->salt = DEF_SALT; + options->printAC=FALSE; } void printapair(int32_t index,ppair_t pair, poptions_t options) @@ -176,7 +147,7 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) uint32_t i; float temp; CNNParams nnparams; - + //nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA); char *c; @@ -215,25 +186,25 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) //print primer1 printf("%s\t", p1); - + //print primer2 printf("%s", p2); - + //print primer1 melting temperature - printf ("\t%4.3f", pair->p1temp); + printf ("\t%3.1f", pair->p1temp); //print minimum melting temperature of approximate versions of primer1 - printf ("\t%4.3f", pair->p1mintemp); - + printf ("\t%3.1f", pair->p1mintemp); + //print primer2 melting temperature - printf ("\t%4.3f", pair->p2temp); - + printf ("\t%3.1f", pair->p2temp); + //print minimum melting temperature of approximate versions of primer2 - printf ("\t%4.3f", pair->p2mintemp); - + printf ("\t%3.1f", pair->p2mintemp); + //print gc contents of primer1 printf ("\t%d",nparam_CountGCContent(p1)); - + //print gc contents of primer2 printf ("\t%d",nparam_CountGCContent(p2)); @@ -242,19 +213,19 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) //print inexample count printf("\t%d", pair->inexample); - + //print out example count printf("\t%d", pair->outexample); - + //print yule printf("\t%4.3f", pair->yule); //print in taxa count printf("\t%d", pair->intaxa); - + //print out taxa count printf("\t%d", pair->outtaxa); - + //print coverage printf("\t%4.3f", (float)pair->bc); @@ -266,10 +237,10 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) //print min amplifia lenght printf("\t%d", pair->mind); - + //print max amplifia lenght printf("\t%d", pair->maxd); - + //print average amplifia lenght printf("\t%3.2f", (float)pair->sumd/pair->inexample); @@ -392,7 +363,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) count=filterandsortpairs(sortedpairs,pairs->count,options); getThermoProperties(sortedpairs, count, options); - + fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count); printf("#\n"); @@ -454,7 +425,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) for (i=0;i < count;i++) printapair(i,sortedpairs[i],options); - + } @@ -508,6 +479,13 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options) } /* to get db stats, totals of species, genus etc....*/ +static void printAC(pecodnadb_t seqdb,uint32_t seqdbsize) +{ + uint32_t i; + + for (i=0; i< seqdbsize;i++) + printf("%15s : %s\n",seqdb[i]->AC,seqdb[i]->DE); +} int main(int argc, char **argv) { @@ -530,22 +508,11 @@ int main(int argc, char **argv) int32_t rankdbstats = 0; - //printcurrenttime(); - //return 0; - //openlibman (); - //float temp = calculateMeltingTemperature ("GGTCTGAACTCAGATCAC", "CTGTTTACCAAAAACATC"); - //float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC"); - //printf ("temp = %f\n", temp); - //return 0; - //char *t = "ACGT"; - //printf ("\nGETNUMCODE: A=%d, C=%d, G=%d, T=%d\n", GETNUMCODE(t[0]), GETNUMCODE('C'), GETNUMCODE('G'), GETNUMCODE('T')); - //printf ("\nGETREVCODE: A=%d, C=%d, G=%d, T=%d\n", GETREVCODE(t[0]), GETREVCODE('C'), GETREVCODE('G'), GETREVCODE('T')); - //return 0; CNNParams nnparams; - + initoptions(&options); - while ((carg = getopt(argc, argv, "hfvcUDSd:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) { + while ((carg = getopt(argc, argv, "hAfvcUDSd:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) { switch (carg) { /* ---------------------------- */ @@ -558,6 +525,12 @@ int main(int argc, char **argv) case 'f': /* set in single strand mode */ /* ---------------------------- */ options.filtering=FALSE; + break; + + /* ---------------------------- */ + case 'A': /* set in single strand mode */ + /* ---------------------------- */ + options.printAC=TRUE; break; /* -------------------- */ @@ -702,11 +675,11 @@ int main(int argc, char **argv) options.pnparm = &nnparams; if (options.saltmethod != 2) //if not SALT_METHOD_OWCZARZY options.saltmethod = SALT_METHOD_SANTALUCIA; //then force SALT_METHOD_SANTALUCIA - - + + if (options.salt < 0.01 || options.salt > 0.3) //if salt value out of literature values options.salt = DEF_SALT; //set to default - + nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,options.salt,options.saltmethod); fprintf(stderr,"Reading taxonomy database ..."); @@ -719,6 +692,11 @@ int main(int argc, char **argv) seqdb = readdnadb(options.prefix,&seqdbsize); + if (options.printAC) + { + printAC(seqdb,seqdbsize); + exit(0); + } if (options.reference) for (i=0; i < seqdbsize;i++) if (strcmp(seqdb[i]->AC,options.reference)==0) @@ -748,7 +726,7 @@ int main(int argc, char **argv) words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options); fprintf(stderr,"\n Strict primer count : %d\n",words->size); - + // options.filtering=FALSE; // words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options); // fprintf(stderr,"\n Strict primer count : %d\n",words2->size); diff --git a/src/libecoprimer/aproxpattern.c b/src/libecoprimer/aproxpattern.c index 170f00e..0cf349e 100644 --- a/src/libecoprimer/aproxpattern.c +++ b/src/libecoprimer/aproxpattern.c @@ -97,7 +97,8 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3 params.circular = options->circular; params.maxerr = options->error_max; - params.omask = (1 << options->strict_three_prime) -1; +// params.omask = (1 << options->strict_three_prime) -1; + params.omask = 0; params.patlen = options->primer_length; positions.val=NULL; diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index 68b774e..a25c8ad 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -86,7 +86,8 @@ typedef struct { typedef union { uint32_t *pointer; uint32_t value; -} poslist_t, *ppostlist_t; +} poslist_t, *pposlist_t; + /** * primer_t structure store fuzzy match positions for a primer @@ -96,10 +97,10 @@ typedef union { typedef struct { word_t word; //< code for the primer uint32_t *directCount; //< Occurrence count on direct strand - ppostlist_t directPos; //< list of position list on direct strand + pposlist_t directPos; //< list of position list on direct strand uint32_t *reverseCount; //< Occurrence count on reverse strand - ppostlist_t reversePos; //< list of position list on reverse strand + pposlist_t reversePos; //< list of position list on reverse strand bool_t good; //< primer match more than quorum example and no // more counterexample quorum. @@ -191,7 +192,7 @@ typedef struct { // // uint32_t oktaxoncount; uint32_t curseqid; - float p1temp; //strict primer1 melting temperature + float p1temp; //strict primer1 melting temperature float p1mintemp; //approx primer1 minimum melting temperature float p2temp; //strict primer2 melting temperature float p2mintemp; //approx primer2 minimum melting temperature @@ -246,6 +247,7 @@ typedef struct { }taxontoamp_t, *ptaxontoamp_t; typedef struct { + bool_t printAC; bool_t statistics; bool_t filtering; uint32_t lmin; //**< Amplifia minimal length diff --git a/src/libthermo/Makefile b/src/libthermo/Makefile index 60ae686..1685cba 100644 --- a/src/libthermo/Makefile +++ b/src/libthermo/Makefile @@ -1,6 +1,6 @@ SOURCES = nnparams.c \ - thermostats.c + thermostats.c SRCS=$(SOURCES) diff --git a/src/libthermo/nnparams.c b/src/libthermo/nnparams.c index 047498c..8c26f70 100644 --- a/src/libthermo/nnparams.c +++ b/src/libthermo/nnparams.c @@ -3,7 +3,7 @@ * PHunterLib * * Nearest Neighbor Model / Parameters - * + * * Created by Tiayyba Riaz on 7/2/09. * */ @@ -18,7 +18,7 @@ float forbidden_entropy; -float nparam_GetInitialEntropy(PNNParams nparm) +float nparam_GetInitialEntropy(PNNParams nparm) { return -5.9f+nparm->rlogc; } @@ -53,9 +53,9 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1) if (nparm->saltMethod == SALT_METHOD_OWCZARZY) { float logk = log(nparm->kplus); answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk); - } + } return answer; -} +} /* PURPOSE: Return melting temperature TM for given entropy and enthalpy * Assuming a one-state transition and using the formula @@ -70,7 +70,7 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1) * * PARAMETERS: * entrypy and enthalpy -* +* * RETURN VALUE: * temperature */ @@ -78,7 +78,7 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1) float nparam_CalcTM(float entropy,float enthalpy) { float tm = 0; // absolute zero - return if model fails! - if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact)) + if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact)) return 0; if (entropy<0) // avoid division by zero and model errors! { @@ -93,7 +93,7 @@ float nparam_CalcTM(float entropy,float enthalpy) void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) { nparm->Ct1 = c1; - nparm->Ct2 = c2; + nparm->Ct2 = c2; nparm->kplus = kp; int maxCT = 1; if(nparm->Ct2 > nparm->Ct1) @@ -101,11 +101,11 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) maxCT = 2; } float ctFactor; - if(nparm->Ct1 == nparm->Ct2) + if(nparm->Ct1 == nparm->Ct2) { ctFactor = nparm->Ct1/2; } - else if (maxCT == 1) + else if (maxCT == 1) { ctFactor = nparm->Ct1-nparm->Ct2/2; } @@ -123,7 +123,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) memset(nparm->dH,0,sizeof(nparm->dH)); memset(nparm->dS,0,sizeof(nparm->dS)); - // Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small! + // Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small! for (x=1;x<=4;x++) { for (y=1;y<=4;y++) @@ -152,7 +152,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) ndS(5,x,y,0)=forbidden_entropy; ndH(0,x,y,5)=forbidden_enthalpy; ndS(0,x,y,5)=forbidden_entropy; - + } // also, forbid x-/-- and --/x-, i.e. no two inner gaps paired ndH(x,0,0,0)=forbidden_enthalpy; @@ -212,7 +212,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) // x-/ya abd xa/y- as well as -x/ay and ax/-y // bulge opening and closing parameters with // adjacent matches / mismatches - // obulge_mism and cbulge_mism chosen so high to avoid + // obulge_mism and cbulge_mism chosen so high to avoid // AAAAAAAAA // T--G----T // being better than @@ -474,10 +474,10 @@ void nparam_CleanSeq (char* inseq, char* outseq, int len) { int seqlen = strlen (inseq); int i, j; - + if (len != 0) seqlen = len; - + for (i = 0, j = 0; i < seqlen; i++) { switch (inseq[i]) @@ -517,20 +517,61 @@ float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len) unsigned int i; char nseq[50]; char *useq = seq; - + nparam_CleanSeq (seq, nseq, len); useq = nseq; - - int slen = strlen(useq); - - //fprintf (stderr,"Primer : %s\n",useq); - for ( i=1;idH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2); + thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2); + } +// printf("------------------\n"); + mtemp = nparam_CalcTM(thedS,thedH); + //if (mtemp == 0) + //{ + // fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq); + //exit (0); + //} + return mtemp; +} + +float nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len) +{ + float thedH = 0; + //float thedS = nparam_GetInitialEntropy(nparm); + float thedS = -5.9f+nparm->rlogc; + float mtemp; + char c1; + char c2; + char c3; + char c4; + unsigned int i; + char nseq1[50]; + char nseq2[50]; + char *useq1; + char *useq2; + + nparam_CleanSeq (seq1, nseq1, len); + useq1 = nseq1; + + nparam_CleanSeq (seq2, nseq2, len); + useq2 = nseq2; + + //fprintf (stderr,"Primer : %s\n",useq); + for ( i=1;idH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2); @@ -550,7 +591,7 @@ float calculateMeltingTemperatureBasic (char * seq) { int gccount; float temp; int seqlen; - + seqlen = strlen (seq); gccount = nparam_CountGCContent (seq); temp = 64.9 + 41*(gccount - 16.4)/seqlen; diff --git a/src/libthermo/nnparams.h b/src/libthermo/nnparams.h index 5c0cb36..8344f51 100644 --- a/src/libthermo/nnparams.h +++ b/src/libthermo/nnparams.h @@ -3,7 +3,7 @@ * PHunterLib * * Nearest Neighbor Model Parameters - * + * * Created by Tiayyba Riaz on 02/07/09. * */ @@ -27,8 +27,8 @@ #define DEF_CONC_SEQUENCES 0 #define DEF_SALT 0.05 -#define GETNUMCODE(a) bpencoder[a - 'A'] -#define GETREVCODE(a) 5-bpencoder[a - 'A'] +#define GETNUMCODE(a) bpencoder[a - 'A'] +#define GETREVCODE(a) 5-bpencoder[a - 'A'] extern float forbidden_entropy; @@ -43,7 +43,7 @@ static char bpencoder[] = { 1, // A 0,0,0,0,0}; // v,w,x,y,z -typedef struct CNNParams_st +typedef struct CNNParams_st { float Ct1; float Ct2; @@ -63,8 +63,10 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1); float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1); float nparam_CalcTM(float entropy,float enthalpy); float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len); +float nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len); + float nparam_GetInitialEntropy(PNNParams nparm) ; float calculateMeltingTemperatureBasic (char * seq); //void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options); -#endif +#endif diff --git a/src/libthermo/thermostats.c b/src/libthermo/thermostats.c index ecd57d1..03f4fc3 100644 --- a/src/libthermo/thermostats.c +++ b/src/libthermo/thermostats.c @@ -4,18 +4,44 @@ #include #include "thermostats.h" +static word_t extractSite(char* sequence, + size_t begin, size_t length, bool_t strand) +{ + char *c; + char *start; + uint32_t l; + word_t site = 0; + + start=sequence+begin; + if (!strand) + start+=length-1; + + + for (c=start, + l=0; + lp1->word; @@ -27,38 +53,62 @@ void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options) if (!pairs[i]->asdirect2) w2=ecoComplementWord(w2,options->primer_length); - pairs[i]->p1temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(w1, options->primer_length), 0) - 273.0; - pairs[i]->p2temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(w2, options->primer_length), 0) - 273.0; + strncpy(prmrd,ecoUnhashWord(w1, options->primer_length),options->primer_length); + strncpy(prmrr,ecoUnhashWord(w2, options->primer_length),options->primer_length); + prmrd[options->primer_length]=0; + prmrr[options->primer_length]=0; + pairs[i]->p1temp = nparam_CalcSelfTM (options->pnparm, prmrd, options->primer_length) - 273.0; + pairs[i]->p2temp = nparam_CalcSelfTM (options->pnparm, prmrr, options->primer_length) - 273.0; pairs[i]->p1mintemp = 100; pairs[i]->p2mintemp = 100; for (j = 0; j < pairs[i]->pcr.ampcount; j++) { sq = pairs[i]->pcr.amplifias[j].sequence->SQ; - begin = pairs[i]->pcr.amplifias[j].begin - options->primer_length; - end = pairs[i]->pcr.amplifias[j].end + 1; - - memcpy (prmr, sq + begin, options->primer_length); - mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0; - if (mtemp < pairs[i]->p1mintemp) - pairs[i]->p1mintemp = mtemp; - //fprintf (stderr, "prmr1: %s\n", seqdb[seqid]->SQ); - memcpy (prmr, sq + end, options->primer_length); - mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0; + strand = pairs[i]->pcr.amplifias[j].strand; + bp1 = pairs[i]->pcr.amplifias[j].begin - options->primer_length; + bp2 = pairs[i]->pcr.amplifias[j].end + 1; + + if (!strand) + { + uint32_t tmp; + tmp=bp1; + bp1=bp2; + bp2=tmp; + } + +// printf("%s : %s, %c",prmrd, +// ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length), +// "rd"[strand]); + mtemp = nparam_CalcTwoTM(options->pnparm, + prmrd, + ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length), + options->primer_length) - 273.0; +// printf(" %4.2f %4.2f\n",pairs[i]->p1temp,mtemp); + if (mtemp < pairs[i]->p1mintemp) + pairs[i]->p1mintemp = mtemp; + +// printf("%s : %s, %c\n",prmrr,ecoUnhashWord(extractSite(sq,bp2,options->primer_length,!strand),options->primer_length), +// "rd"[strand]); +// + mtemp = nparam_CalcTwoTM(options->pnparm, + prmrr, + ecoUnhashWord(extractSite(sq,bp2,options->primer_length,!strand),options->primer_length), + options->primer_length) - 273.0; if (mtemp < pairs[i]->p2mintemp) pairs[i]->p2mintemp = mtemp; } - + if (w2 < w1) { mtemp = pairs[i]->p1temp; pairs[i]->p1temp = pairs[i]->p2temp; pairs[i]->p2temp = mtemp; - + mtemp = pairs[i]->p1mintemp; pairs[i]->p1mintemp = pairs[i]->p2mintemp; pairs[i]->p2mintemp = mtemp; } - + } -} \ No newline at end of file +}