diff --git a/src/ecoprimer.c b/src/ecoprimer.c index 3f481b0..75ed821 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -6,7 +6,6 @@ */ #include "libecoprimer/ecoprimer.h" -#include "libthermo/libthermo.h" #include #include #include @@ -15,6 +14,9 @@ #include #include #include +#include"libthermo/nnparams.h" +#include"libthermo/thermostats.h" + #define VERSION "0.3" /* TR: by default, statistics are made on species level*/ @@ -153,37 +155,10 @@ void initoptions(poptions_t options) options->r=0; options->g=0; options->no_multi_match=FALSE; + options->pnparm = NULL; strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/ } -void printcurrenttime () -{ - time_t now; - struct tm *ts; - char buf[80]; - - /* Get the current time */ - now = time(NULL); - - /* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */ - ts = localtime(&now); - strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts); - fprintf(stderr,"#%d#, %s\n",(int)now, buf); - } - -void printcurrenttimeinmilli() -{ - struct timeval tv; - struct timezone tz; - struct tm *tm; - gettimeofday(&tv, &tz); - tm=localtime(&tv.tv_sec); - fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min, - tm->tm_sec, tv.tv_usec, tv.tv_usec/1000); -} - -/*TR: Added*/ - void printapair(int32_t index,ppair_t pair, poptions_t options) { bool_t asdirect1=pair->asdirect1; @@ -198,6 +173,9 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) bool_t strand; uint32_t i; float temp; + CNNParams nnparams; + + //nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA); char *c; char p1[32]; @@ -225,6 +203,7 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) good2=goodtmp; } + //print serial number printf("%6d\t",index); c = ecoUnhashWord(w1,options->primer_length); @@ -232,57 +211,67 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) c = ecoUnhashWord(w2,options->primer_length); strcpy (p2, c); + //print primer1 printf("%s\t", p1); + + //print primer2 printf("%s", p2); - /*For thermo: first word should be 5'-3'*/ - /*if (!asdirect1) - { - wtmp = ecoComplementWord(w1,options->primer_length); - c = ecoUnhashWord(wtmp,options->primer_length); - strcpy (p1, c); - }*/ - /*For thermo: first word should be 5'-3'*/ - /*if (asdirect2) - { - wtmp = ecoComplementWord(w2,options->primer_length); - c = ecoUnhashWord(wtmp,options->primer_length); - strcpy (p2, c); - }*/ + //print primer1 melting temperature + printf ("\t%4.3f", pair->p1temp); - /*temp = calculateMeltingTemperature (p1, p2);*/ - //float temp = calculateMeltingTemperature ("CTGTTTACCAAAAACATC", "GGTCTGAACTCAGATCAC"); - - temp = calculateMeltingTemperatureBasic(p1); - printf ("\t%4.3f", temp); - - temp = calculateMeltingTemperatureBasic(p2); - printf ("\t%4.3f", temp); - - printf ("\t%d",countGCContent(p1)); - - printf ("\t%d",countGCContent(p2)); + //print minimum melting temperature of approximate versions of primer1 + printf ("\t%4.3f", pair->p1mintemp); + + //print primer2 melting temperature + printf ("\t%4.3f", pair->p2temp); + + //print minimum melting temperature of approximate versions of primer2 + printf ("\t%4.3f", pair->p2mintemp); + + //print gc contents of primer1 + printf ("\t%d",nparam_CountGCContent(p1)); + + //print gc contents of primer2 + printf ("\t%d",nparam_CountGCContent(p2)); + //print good/bad pair indicator printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]); - + //print inexample count printf("\t%d", pair->inexample); + + //print out example count printf("\t%d", pair->outexample); + + //print yule printf("\t%4.3f", pair->yule); + //print in taxa count printf("\t%d", pair->intaxa); + + //print out taxa count printf("\t%d", pair->outtaxa); + + //print coverage printf("\t%4.3f", (float)pair->bc); + //print well identified taxa count printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa); + //print specificity printf("\t%4.3f", pair->bs); - + //print min amplifia lenght printf("\t%d", pair->mind); + + //print max amplifia lenght printf("\t%d", pair->maxd); + + //print average amplifia lenght printf("\t%3.2f", (float)pair->sumd/pair->inexample); + //print amplifia information about reference sequence if specified if (options->refseq && pair->refsequence >=0) { printf("\t%s:",options->reference); @@ -351,31 +340,21 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti qfp = (float)sortedpairs[i]->outexample/options->outsamples; else qfp=0.0; - sortedpairs[i]->quorumin = q; sortedpairs[i]->quorumout = qfp; sortedpairs[i]->yule = q - qfp; - - sortedpairs[j]=sortedpairs[i]; - - if (q > options->sensitivity_quorum && qfp < options->false_positive_quorum) { (void)taxonomycoverage(sortedpairs[j],options); taxonomyspecificity(sortedpairs[j]); -// fprintf(stderr,"%4.3f %4.3f %4.3f\n",sortedpairs[j]->yule , sortedpairs[j]->bs,sortedpairs[j]->bc); - j++; } } - - qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs); - return j; } @@ -410,7 +389,8 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) sortedpairs[j]=pl->pairs+i; count=filterandsortpairs(sortedpairs,pairs->count,options); - + getThermoProperties(sortedpairs, count, options); + fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count); printf("#\n"); @@ -477,7 +457,6 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) } - /*updateseqparams: This function counts the insample and outsample sequences * and with each sequences adds a tag of the taxon to which the sequence beongs*/ @@ -556,6 +535,12 @@ int main(int argc, char **argv) //float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC"); //printf ("temp = %f\n", temp); //return 0; + //char *t = "ACGT"; + //printf ("\nGETNUMCODE: A=%d, C=%d, G=%d, T=%d\n", GETNUMCODE(t[0]), GETNUMCODE('C'), GETNUMCODE('G'), GETNUMCODE('T')); + //printf ("\nGETREVCODE: A=%d, C=%d, G=%d, T=%d\n", GETREVCODE(t[0]), GETREVCODE('C'), GETREVCODE('G'), GETREVCODE('T')); + //return 0; + CNNParams nnparams; + nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA); initoptions(&options); @@ -701,7 +686,7 @@ int main(int argc, char **argv) } } - + options.pnparm = &nnparams; fprintf(stderr,"Reading taxonomy database ..."); taxonomy = read_taxonomy(options.prefix,0); fprintf(stderr,"Ok\n"); @@ -741,7 +726,7 @@ int main(int argc, char **argv) words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options); fprintf(stderr,"\n Strict primer count : %d\n",words->size); - + // options.filtering=FALSE; // words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options); // fprintf(stderr,"\n Strict primer count : %d\n",words2->size); @@ -789,17 +774,10 @@ int main(int argc, char **argv) fprintf(stderr,"\n"); - /*TR: Added*/ pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options); - //fprintf(stderr,"buildPrimerPairs finished\n"); - // setoktaxforspecificity (&pairs); - printpairs (pairs, &options,taxonomy); // closlibman (); - - //ECOFREE(pairs.pairs,"Free pairs table"); - return 0; } diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index 0cf3198..f226a01 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -13,6 +13,7 @@ #include #include "ecotype.h" #include "../libecoPCR/ecoPCR.h" +#include "../libthermo/nnparams.h" #include "apat.h" #define DEBUG @@ -190,7 +191,10 @@ typedef struct { // // uint32_t oktaxoncount; uint32_t curseqid; - + float p1temp; //strict primer1 melting temperature + float p1mintemp; //approx primer1 minimum melting temperature + float p2temp; //strict primer2 melting temperature + float p2mintemp; //approx primer2 minimum melting temperature } pair_t, *ppair_t; /*TR: Added*/ @@ -274,6 +278,7 @@ typedef struct { int32_t outsamples; int32_t intaxa; int32_t outtaxa; + PNNParams pnparm; } options_t, *poptions_t; typedef ecoseq_t **pecodnadb_t; @@ -338,5 +343,6 @@ void taxonomyspecificity (ppair_t pair); int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize, uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum); +void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize); #endif /* EPSORT_H_ */ diff --git a/src/libecoprimer/pairs.c b/src/libecoprimer/pairs.c index 8f2df4c..24b6dde 100644 --- a/src/libecoprimer/pairs.c +++ b/src/libecoprimer/pairs.c @@ -156,16 +156,14 @@ ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t { uint32_t i; ppairtree_t primerpairs; - + primerpairs = initpairtree(NULL); for (i=0; i < seqdbsize; i++) { buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options); } - return primerpairs; - } #define DMAX (2000000000) @@ -187,7 +185,11 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid, bool_t bswp; size_t distance; bool_t strand; - + //char prmr[50]; + //float mtemp; + + //prmr[options->primer_length] = '\0'; + for (i=0;i < primers->size; i++) { matchcount+=primers->primers[i].directCount[seqid]; @@ -272,10 +274,12 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid, current.outexample=0; current.curseqid = 0; current.refsequence=-1; - + //current.p1temp = 100; + //current.p1mintemp = 100; + //current.p2temp = 100; + //current.p2mintemp = 100; // Standardize the pair - strand = current.p2->word > current.p1->word; if (!strand) { @@ -309,6 +313,10 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid, //else // pcurrent->outexample++; + //for each pair we save current sequence id in the pair + //when we see this pair for the first time in currnet sequence + //because we want to increment inexample & outexample count + //only once for one sequence if (pcurrent->curseqid != (seqid+1)) { if (seqdb[seqid]->isexample) @@ -359,6 +367,23 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid, else pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ; + + /*strncpy (prmr, seqdb[seqid]->SQ + matches[i].position, options->primer_length); + mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0; + if (mtemp < pcurrent->p1mintemp) + pcurrent->p1mintemp = mtemp; + //fprintf (stderr, "prmr1: %s\n", seqdb[seqid]->SQ); + strncpy (prmr, seqdb[seqid]->SQ + matches[j].position, options->primer_length); + mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0; + if (mtemp < pcurrent->p2mintemp) + pcurrent->p2mintemp = mtemp; + //fprintf (stderr, "prmr2: %s\n", prmr); + + if (pcurrent->p1temp == 100) + pcurrent->p1temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p1->word, options->primer_length), 0) - 273.0; + if (pcurrent->p2temp == 100) + pcurrent->p2temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p2->word, options->primer_length), 0) - 273.0; + */ pcurrent->pcr.ampcount++; // fprintf(stderr,"%c%c W1 : %s direct : %c", // "bG"[(int)pcurrent->p1->good], diff --git a/src/libecoprimer/readdnadb.c b/src/libecoprimer/readdnadb.c index a148510..98867dd 100644 --- a/src/libecoprimer/readdnadb.c +++ b/src/libecoprimer/readdnadb.c @@ -33,3 +33,18 @@ pecodnadb_t readdnadb(const char *name, uint32_t *size) return db; } + + +void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize) +{ + uint32_t i; + char ch[11]; + ch [10] = '\0'; + + for (i=0; i < seqdbsize; i++) + { + strncpy (ch, seqdb[i]->SQ, 10); + fprintf (stderr, "seq %d = %s\n", i, ch); + } + exit (0); +} \ No newline at end of file diff --git a/src/libthermo/Makefile b/src/libthermo/Makefile index fefcbc7..60ae686 100644 --- a/src/libthermo/Makefile +++ b/src/libthermo/Makefile @@ -1,9 +1,6 @@ -SOURCES = dinkelbach.c \ - galign.c \ - libthermo.c \ - nnparams.c \ - thermalign.c +SOURCES = nnparams.c \ + thermostats.c SRCS=$(SOURCES) diff --git a/src/libthermo/libthermo.a b/src/libthermo/libthermo.a index 00900a2..cbedd26 100644 Binary files a/src/libthermo/libthermo.a and b/src/libthermo/libthermo.a differ diff --git a/src/libthermo/nnparams.c b/src/libthermo/nnparams.c index 71570bf..a7ed186 100644 --- a/src/libthermo/nnparams.c +++ b/src/libthermo/nnparams.c @@ -2,68 +2,101 @@ * nnparams.cpp * PHunterLib * + * Nearest Neighbor Model / Parameters + * * Created by Tiayyba Riaz on 7/2/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. * */ -//============================================================================= -// Module: nnparams.cpp -// Project: Diploma Thesis - Probe Selection for DNA Microarrays -// Type: implementation - Nearest Neighbor Model / Parameters -// Language: c++ -// Compiler: microsoft visual c++ 6.0, unix/linux gcc -// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) -// Database: none -// Description: class CNNParams - Nearest Neighbor Model / Parameters -// Author: kaderali -// Date: 12/2000 -// Copyright: (c) L. Kaderali, 9/2000 - 12/2000 -// -// Revision History -// $ 00sep07 LK : created -// 00dec17 LK : modified to use new nn parameters -// 00dec29 LK : modified to include dangling end parameters -// 01jan09 LK : included CalcSelfTM -// 01feb07 LK : optimized -// #$ -//============================================================================= - #include #include +#include #include #include"nnparams.h" -#ifdef _pack -#pragma pack(1) -#endif - - float forbidden_entropy; -/////////////////////////////////////////////////////////////////////////////// -// Construction/Destruction -/////////////////////////////////////////////////////////////////////////////// + +float nparam_GetInitialEntropy(PNNParams nparm) +{ + return -5.9f+nparm->rlogc; +} -/////////////////////////////////////////////////////////////////////////////// -// FUNCTION: void CNNParams::InitParams() -// -// PURPOSE: Initialize nearest neighbor parameters. For now, simply set -// params as requiered. Extend to read from file sometime. -// -// PARAMETERS: -// none -// -// RETURN VALUE: -// void -// -// REVISION HISTORY -// $ 00sep06 : created LK -// 00dec17 : modified to use new parameters LK -// 00dec29 : included dangling end data LK -// #$ +//Retrieve Enthalpy for given NN-Pair from parameter table +float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1) +{ + return ndH(x0,x1,y0,y1); //xx, yx are already numbers +} + + +//Retrieve Entropy for given NN-Pair from parameter table +float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1) +{ + //xx and yx are already numbers + char nx0=x0;//nparam_convertNum(x0); + char nx1=x1;//nparam_convertNum(x1); + char ny0=y0;//nparam_convertNum(y0); + char ny1=y1;//nparam_convertNum(y1); + float answer = ndS(nx0,nx1,ny0,ny1); + /*Salt correction Santalucia*/ + if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) { + if(nx0!=5 && 1<= nx1 && nx1<=4) { + answer += 0.5*nparm->kfac; + } + if(ny1!=5 && 1<= ny0 && ny0<=4) { + answer += 0.5*nparm->kfac; + } + } + /*Salt correction Owczarzy*/ + if (nparm->saltMethod == SALT_METHOD_OWCZARZY) { + float logk = log(nparm->kplus); + answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk); + } + return answer; +} + +/* PURPOSE: Return melting temperature TM for given entropy and enthalpy +* Assuming a one-state transition and using the formula +* TM = dH / (dS + R ln(Ct/4)) +* entropy = dS + R ln Ct/4 (must already be included!) +* enthaklpy = dH +* where +* dH = enthalpy +* dS = entropy +* R = Boltzmann factor +* Ct = Strand Concentration +* +* PARAMETERS: +* entrypy and enthalpy +* +* RETURN VALUE: +* temperature +*/ + +float nparam_CalcTM(float entropy,float enthalpy) +{ + float tm = 0; // absolute zero - return if model fails! + if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact)) + return 0; + if (entropy<0) // avoid division by zero and model errors! + { + tm = enthalpy/entropy;// - kfac; //LKFEB + if (tm<0) + return 0; + } + return tm; +} + +/* PURPOSE: Initialize nearest neighbor parameters. +* +* PARAMETERS: +* none +* +* RETURN VALUE: +* void +*/ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) { @@ -433,23 +466,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) return; } -void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2) -{ - float l1 = strlen(s1); - float l2 = strlen(s2); - if(l1gcContent = nparam_CountGCContent(s1); - nparm->gcContent /= (l1-2); - } else if (l1>l2) { - nparm->gcContent = nparam_CountGCContent(s2); - nparm->gcContent /= (l2-2); - } else { - nparm->gcContent = nparam_CountGCContent(s1)+nparam_CountGCContent(s2); - nparm->gcContent /= (l1+l2-4); - } -} - -float nparam_CountGCContent(char * seq ) { +int nparam_CountGCContent(char * seq ) { int lseq = strlen(seq); int k; float count = 0; @@ -461,352 +478,89 @@ float nparam_CountGCContent(char * seq ) { return count; } -/////////////////////////////////////////////////////////////////////////////// -// FUNCTION: static char CNNParams::getComplement(char mychar) -// -// PURPOSE: return Watson-Crick complement to mychar -// -// PARAMETERS: -// mychar - Character to complement -// -// RETURN VALUE: -// char - complement to mychar -// -// REVISION HISTORY -// $ 00sep28 : created LK -// #$ - -char nparam_getComplement(char mychar, int asnum) +void nparam_CleanSeq (char* inseq, char* outseq, int len) { - if (mychar==1) // A -> T - return 4; - if (mychar==2) // C -> G - return 3; - if (mychar==3) // G -> T - return 2; - if (mychar==4) // T -> A - return 1; - if (mychar==5) // $ -> $ - return 5; - if (!asnum) + int seqlen = strlen (inseq); + int i, j; + + if (len != 0) + seqlen = len; + + for (i = 0, j = 0; i < seqlen; i++) { - if (mychar=='A') // A -> T - return 'T'; - if (mychar=='C') // C -> G - return 'G'; - if (mychar=='G') // G -> T - return 'C'; - if (mychar=='T') // T -> A - return 'A'; - if (mychar=='$') // $ -> $ - return '$'; + switch (inseq[i]) + { + case 'a': + case '\0': + case 'A': + outseq[j++] = 'A'; break; + case 'c': + case '\1': + case 'C': + outseq[j++] = 'C'; break; + case 'g': + case '\2': + case 'G': + outseq[j++] = 'G'; break; + case 't': + case '\3': + case 'T': + outseq[j++] = 'T'; break; + } } - else - { - if (mychar=='A') // A -> T - return 4; - if (mychar=='C') // C -> G - return 3; - if (mychar=='G') // G -> T - return 2; - if (mychar=='T') // T -> A - return 1; - if (mychar=='$') // $ -> $ - return 5; - } - return '*'; + outseq[j] = '\0'; } - - -/////////////////////////////////////////////////////////////////////////////// -// FUNCTION: static bool CNNParams::isMismatch(char,char) -// -// PURPOSE: Return true if char1 and char2 are not watson-crick pair -// -// PARAMETERS: -// char1 - first character -// char2 - second character -// -// RETURN VALUE: -// bool - true if char1,char2 are Watson-Crick, false otw -// -// REVISION HISTORY -// $ 00sep28 : created LK -// #$ - -int nparam_isMismatch(char c1,char c2) +//Calculate TM for given sequence against its complement +float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len) { - if (c1+c2==5) - return 0; - else - return 1; + float thedH = 0; + //float thedS = nparam_GetInitialEntropy(nparm); + float thedS = -5.9f+nparm->rlogc; + float mtemp; + char c1; + char c2; + char c3; + char c4; + unsigned int i; + char nseq[50]; + char *useq = seq; + + nparam_CleanSeq (seq, nseq, len); + useq = nseq; + + int slen = strlen(useq); + + //fprintf (stderr,"Primer : %s\n",useq); + for ( i=1;idH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2); + thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2); + } + //fprintf(stderr,"------------------\n"); + mtemp = nparam_CalcTM(thedS,thedH); + //if (mtemp == 0) + //{ + // fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq); + //exit (0); + //} + return mtemp; } - - char nparam_convertNum(char c) - { - if (c=='A') - return 1; - if (c=='C') - return 2; - if (c=='G') - return 3; - if (c=='T') - return 4; - if (c=='$') - return 5; - return 0; - } - - /////////////////////////////////////////////////////////////////////////////// - // inline function - /////////////////////////////////////////////////////////////////////////////// - // FUNCTION: float CNNParams::GetEntropy(char,char,char,char) - // - // PURPOSE: Retrieve Entropy for given NN-Pair from parameter table - // - // PARAMETERS: x0,x1,y0,y1: Pairs to look up in form - // 5'-x0-x1-3' / 3'-y0-y1-5' - // - // RETURN VALUE: - // float: Entropy dS for given NN pair - // - // REVISION HISTORY - // $ 00sep06 : created LK - // 00dec29 : included dangling end parameters - // 01feb07 : rewritten. looks ugly now, but is FAST! inline. - // #$ - - float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1) - { - char nx0=nparam_convertNum(x0); - char nx1=nparam_convertNum(x1); - char ny0=nparam_convertNum(y0); - char ny1=nparam_convertNum(y1); - float answer = ndS(nx0,nx1,ny0,ny1); - //Salt correction Santalucia - if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) { - if(nx0!=5 && 1<= nx1 && nx1<=4) { - answer += 0.5*nparm->kfac; - } - if(ny1!=5 && 1<= ny0 && ny0<=4) { - answer += 0.5*nparm->kfac; - } - } - //Salt correction Owczarzy - if (nparm->saltMethod == SALT_METHOD_OWCZARZY) { - float logk = log(nparm->kplus); - answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk); - } - return answer; - } +float calculateMeltingTemperatureBasic (char * seq) { + int gccount; + float temp; + int seqlen; - - /////////////////////////////////////////////////////////////////////////////// - // inline function - /////////////////////////////////////////////////////////////////////////////// - // FUNCTION: float CNNParams::GetEnthalpy(char,char,char,char) - // - // PURPOSE: Retrieve Enthalpy for given NN-Pair from parameter table - // - // PARAMETERS: x0,x1,y0,y1: Pairs to look up in form - // 5'-x0-x1-3' / 3'-y0-y1-5' - // - // RETURN VALUE: - // float: Enthalpy dH for given NN pair - // - // REVISION HISTORY - // $ 00sep06 : created LK - // $ 00dec29 : included dangling end parameters - // $ 01feb07 : rewritten. looks ugly now, but is FAST! inline. - // #$ - - float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1) - { - char nx0=nparam_convertNum(x0); - char nx1=nparam_convertNum(x1); - char ny0=nparam_convertNum(y0); - char ny1=nparam_convertNum(y1); - return ndH(nx0,nx1,ny0,ny1); - } - - - - /////////////////////////////////////////////////////////////////////////////// - // inline function - /////////////////////////////////////////////////////////////////////////////// - // FUNCTION: float CNNParams::CalcTM(float entropy,float enthalpy) - // - // PURPOSE: Return melting temperature TM for given entropy and enthalpy - // Assuming a one-state transition and using the formula - // TM = dH / (dS + R ln(Ct/4)) - // entropy = dS + R ln Ct/4 (must already be included!) - // enthaklpy = dH - // where - // dH = enthalpy - // dS = entropy - // R = Boltzmann factor - // Ct = Strand Concentration - // - // PARAMETERS: - // - // RETURN VALUE: - // - // REVISION HISTORY - // $ 00sep06 : created LK - // $ 01jan07 : modified and corrected - // $ 01feb07 : optimized!!! inline - // $ 01feb09 : changed to include r ln ct in entropy!!! - // #$ - - float nparam_CalcTM(float entropy,float enthalpy) - { - float tm = 0; // absolute zero - return if model fails! - if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact)) - return 0; - if (entropy<0) // avoid division by zero and model errors! - { - tm = enthalpy/entropy;// - kfac; //LKFEB - if (tm<0) - return 0; - } - return tm; - } - - - - /////////////////////////////////////////////////////////////////////////////// - // inline function - /////////////////////////////////////////////////////////////////////////////// - // FUNCTION: void CNNParams::AlterTM(float) - // - // PURPOSE: TM can be altered by a new assignment - // - // PARAMETERS: tm_new: new value for TM - // - // RETURN VALUE: - // - // REVISION HISTORY - // - // #$ - - void nparam_AlterTM(PNNParams nparm, float tm_new) - { - nparm->new_TM = tm_new; - return; - } - - - - /////////////////////////////////////////////////////////////////////////////// - // inline function - /////////////////////////////////////////////////////////////////////////////// - // FUNCTION: float CNNParams::CalcG(float entropy, float enthalpy) - // - // PURPOSE: return free energy G for given entropy and enthalpy - // Assuming a one-state transition and using the formula - // G = dH - new_TM * dS - // dH = enthalpy - // dS = entropy - // new_TM = value for the optimal melting temperature of - // the last iteration - // - // PARAMETERS: entropy and enthalpy - // - // RETURN VALUE: free Energy value (dG) for the - // - // REVISION HISTORY - // - // #$ - - float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy) - { - float freeEnergy = -999999999; - if (enthalpy>=forbidden_enthalpy) - return -999999999; - if (entropy<0) // avoid division by zero and model errors! - { - entropy = entropy * -1; - enthalpy = enthalpy * -1; - freeEnergy = enthalpy - nparm->new_TM * entropy; - } - return freeEnergy; - } - - - - - /////////////////////////////////////////////////////////////////////////////// - // inline function - /////////////////////////////////////////////////////////////////////////////// - // FUNCTION: float CNNParams::CalcSelfTM(char*) - // - // PURPOSE: Calculate TM for given sequence against its complement - // - // PARAMETERS: - // char* - Sequence to consider - // - // RETURN VALUE: - // float - Melting temperature in degrees Kelvin - // - // REVISION HISTORY - // $ 01jan09 LK : created - // $ 01feb07 LK : inline. - // #$ - - float nparam_CalcSelfTM(PNNParams nparm, char* seq) - { - float thedH = 0; - float thedS = nparam_GetInitialEntropy(nparm); - char c1; - char c2; - char c3; - char c4; - unsigned int i; - for ( i=1;i5) - { - if (c3=='A') - c3=1; - if (c3=='C') - c3=2; - if (c3=='G') - c3=3; - if (c3=='T') - c3=4; - if (c3=='$') - c3=5; - } - if (c4>5) - { - if (c4=='A') - c4=1; - if (c4=='C') - c4=2; - if (c4=='G') - c4=3; - if (c4=='T') - c4=4; - if (c4=='$') - c4=5; - } - - thedH += nparam_GetEnthalpy(nparm, c3,c4,c1,c2); - thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2); - } - - return nparam_CalcTM(thedS,thedH); - } - - float nparam_GetInitialEntropy(PNNParams nparm) - { - return -5.9f+nparm->rlogc; - } - - + seqlen = strlen (seq); + gccount = nparam_CountGCContent (seq); + temp = 64.9 + 41*(gccount - 16.4)/seqlen; + return temp; +} diff --git a/src/libthermo/nnparams.h b/src/libthermo/nnparams.h index 01948c7..5c0cb36 100644 --- a/src/libthermo/nnparams.h +++ b/src/libthermo/nnparams.h @@ -2,72 +2,49 @@ * nnparams.h * PHunterLib * - * Created by Tiayyba Riaz on 7/2/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. + * Nearest Neighbor Model Parameters + * + * Created by Tiayyba Riaz on 02/07/09. * */ -//============================================================================= -// Module: nnparams.h -// Project: Diploma Thesis - Probe Selection for DNA Microarrays -// Type: header file - Nearest Neighbor Parameters / Model. -// Language: c++ -// Compiler: microsoft visual c++ 6.0, unix/linux gcc -// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) -// Database: none -// Description: class CNNParams - Nearest Neighbor Model Parameters -// Author: kaderali -// Date: 9-12/2000 -// Copyright: (c) L. Kaderali, 9/2000 - 12/2000 -// -// Revision History -// $ 00sep07 LK : created -// 00dec29 LK : changed to include dangling end data -// 01jan09 LK : included CalcSelfTM function -// 01feb07 LK : optimized -// #$ -//============================================================================= - -#if !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_) -#define AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_ - -#if _MSC_VER > 1000 -#pragma once -#endif // _MSC_VER > 1000 +#ifndef NNPARAMS_H_ +#define NNPARAMS_H_ #include #include - -#ifdef _pack -#pragma pack(1) -#endif - +//#include "../libecoprimer/ecoprimer.h" // following defines to simplify coding... #define ndH(a,b,c,d) nparm->dH[a][b][c][d] #define ndS(a,b,c,d) nparm->dS[a][b][c][d] #define forbidden_enthalpy 1000000000000000000.0f -//#define forbidden_enthalpy_div1000 1000000000000000.0f -// forbidden entropy=-rlogc -// #define forbidden_entropy 30.205986374220235304486574573422f -// Boltzmann factor (cal/degrees C*mol) #define R 1.987f #define SALT_METHOD_SANTALUCIA 1 #define SALT_METHOD_OWCZARZY 2 -// Strand concentration (assumption!) (M) -// #define Ct 0.000001f -// r*ln(ct/4) as required by many formulas -//#define rlogc -30.205986374220235304486574573422f + +#define DEF_CONC_PRIMERS 0.0000008 +#define DEF_CONC_SEQUENCES 0 +#define DEF_SALT 0.05 + +#define GETNUMCODE(a) bpencoder[a - 'A'] +#define GETREVCODE(a) 5-bpencoder[a - 'A'] + + extern float forbidden_entropy; -//----------------------------------------------------------------------------- -// class CNNParams -//typedef class CNNParams* PNNParams; +static char bpencoder[] = { 1, // A + 0, // b + 2, // C + 0,0,0, // d, e, f + 3, // G + 0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s + 4,0, // T,U + 0,0,0,0,0}; // v,w,x,y,z + -//#pragma GCC visibility push(hidden) typedef struct CNNParams_st { -//public: float Ct1; float Ct2; float rlogc; @@ -75,30 +52,19 @@ typedef struct CNNParams_st float kfac; int saltMethod; float gcContent; - float new_TM; // ge‰ndert von ML!!! - //CNNParams(); - //virtual ~CNNParams(); -//private: + float new_TM; float dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign) float dS[6][6][6][6]; }CNNParams, * PNNParams; - //void nparam_InitParams(PNNParams nparm, float c1=0.000001f, float c2=0.000001f, float kp=1, int sm = SALT_METHOD_SANTALUCIA ); - void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm); - void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2); - float nparam_CountGCContent(char * seq ); - char nparam_convertNum(char c); - float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1); - float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1); - float nparam_CalcTM(float entropy,float enthalpy); - void nparam_AlterTM(PNNParams nparm, float tm_new); - float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy); - float nparam_CalcSelfTM(PNNParams nparm, char* seq); - float nparam_GetInitialEntropy(PNNParams nparm) ; - char nparam_getComplement(char mychar, int asnum); - int nparam_isMismatch(char,char); +void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm); +int nparam_CountGCContent(char * seq ); +float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1); +float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1); +float nparam_CalcTM(float entropy,float enthalpy); +float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len); +float nparam_GetInitialEntropy(PNNParams nparm) ; +float calculateMeltingTemperatureBasic (char * seq); +//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options); - - -//#pragma GCC visibility pop -#endif // !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_) +#endif