From f7e25b2082ebc83fa07c8f2d2d201c3248c54d5a Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 22 Jan 2010 09:16:53 +0000 Subject: [PATCH] New version 0.2 of ecoPCR, with Tm computation. Take care file format have change. You must use corresponding version of ecogrep You can use -t option to go back to the old format without tm computation git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@238 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- src/Makefile | 9 +- src/ecogrep.c | 4 +- src/ecopcr.c | 108 ++++++- src/global.mk | 2 +- src/libthermo/Makefile | 22 ++ src/libthermo/nnparams.c | 605 +++++++++++++++++++++++++++++++++++++++ src/libthermo/nnparams.h | 63 ++++ 7 files changed, 796 insertions(+), 17 deletions(-) create mode 100644 src/libthermo/Makefile create mode 100644 src/libthermo/nnparams.c create mode 100644 src/libthermo/nnparams.h diff --git a/src/Makefile b/src/Makefile index 6705ac3..9a0759c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -14,10 +14,11 @@ IUT_OBJ= $(patsubst %.c,%.o,$(IUT_SRC)) SRCS= $(PCR_SRC) $(FIND_SRC) $(IUT_SRC) -LIB= -lecoPCR -lapat -lz -lm +LIB= -lecoPCR -lthermo -lapat -lz -lm LIBFILE= libapat/libapat.a \ - libecoPCR/libecoPCR.a + libecoPCR/libecoPCR.a \ + libthermo/libthermo.a include global.mk @@ -81,6 +82,9 @@ libapat/libapat.a: libecoPCR/libecoPCR.a: $(MAKE) -C libecoPCR +libthermo/libthermo.a: + $(MAKE) -C libthermo + ######## # @@ -93,6 +97,7 @@ clean: rm -f $(EXEC) $(MAKE) -C libapat clean $(MAKE) -C libecoPCR clean + $(MAKE) -C libthermo clean \ No newline at end of file diff --git a/src/ecogrep.c b/src/ecogrep.c index bf32500..8d45312 100644 --- a/src/ecogrep.c +++ b/src/ecogrep.c @@ -29,11 +29,11 @@ void getLineContent(char *stream, ecoseq_t *seq, ecoseq_t *oligoseq_1, ecoseq_t oligoseq_1->SQ = strdup(buffer); oligoseq_1->SQ_length = strlen(buffer); break; - case 15: + case 16: oligoseq_2->SQ = strdup(buffer); oligoseq_2->SQ_length = strlen(buffer); break; - case 18: + case 20: seq->SQ = strdup(buffer); seq->SQ_length = strlen(buffer); break; diff --git a/src/ecopcr.c b/src/ecopcr.c index 3c0f275..d463043 100644 --- a/src/ecopcr.c +++ b/src/ecopcr.c @@ -1,11 +1,13 @@ #include "libecoPCR/ecoPCR.h" +#include "libthermo/nnparams.h" #include #include #include #include -#define VERSION "0.1" +#define VERSION "0.2" + /* ----------------------------------------------- */ /* printout help */ @@ -42,7 +44,12 @@ static void PrintHelp() PP "-r : [R]estricts the search to the given taxonomic id.\n"); PP " Taxonomy id are available using the ecofind program.\n"); PP " see its help typing ecofind -h for more information.\n"); - PP "-c : Consider that the database sequences are [c]ircular"); + PP "-c : Consider that the database sequences are [c]ircular\n"); + PP "-t : don't calculate tm (backward compatibility with old ecoPCR\n"); + PP " output file format)\n"); + PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1\n"); + PP " or OWCZARZY:2, default=1)\n\n"); + PP "-a : Salt contentration in M for Tm computation (default 0.05 M)\n\n"); PP "\n"); PP "------------------------------------------\n"); PP "first argument : oligonucleotide for direct strand\n\n"); @@ -64,11 +71,13 @@ static void PrintHelp() PP "column 13 : strand (direct or reverse)\n"); PP "column 14 : first oligonucleotide\n"); PP "column 15 : number of errors for the first strand\n"); - PP "column 16 : second oligonucleotide\n"); - PP "column 17 : number of errors for the second strand\n"); - PP "column 18 : amplification length\n"); - PP "column 19 : sequence\n"); - PP "column 20 : definition\n"); + PP "column 16 : Tm for hybridization of primer 1 at this site\n"); + PP "column 17 : second oligonucleotide\n"); + PP "column 18 : number of errors for the second strand\n"); + PP "column 19 : Tm for hybridization of primer 1 at this site\n"); + PP "column 20 : amplification length\n"); + PP "column 21 : sequence\n"); + PP "column 22 : definition\n"); PP "------------------------------------------\n"); PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n"); PP "------------------------------------------\n\n"); @@ -97,6 +106,9 @@ static void ExitUsage(stat) #undef PP void printRepeat(ecoseq_t *seq, + char* primer1, char* primer2, + char compute_tm, + PNNParams tparm, PatternPtr o1, PatternPtr o2, char strand, char kingdom, @@ -128,6 +140,8 @@ void printRepeat(ecoseq_t *seq, char *amplifia = NULL; int32_t amplength; + double tm1,tm2; + double tm=0; AC = seq->AC; seqlength = seq->SQ_length; @@ -222,7 +236,38 @@ void printRepeat(ecoseq_t *seq, ecoComplementSequence(oligo2); amplifia[amplength - o2->patlen - o1->patlen]=0; - printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %-32s | %2d | %5d | %s | %s\n", + if (compute_tm) + { + tm1=nparam_CalcTwoTM(tparm,oligo1,primer1,o1->patlen) - 273.15; + tm2=nparam_CalcTwoTM(tparm,oligo2,primer2,o2->patlen) - 273.15; + tm = (tm1 < tm2) ? tm1:tm2; + printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %5.2f | %-32s | %2d | %5.2f | %5d | %s | %s\n", + AC, + seqlength, + taxid, + rank, + species_taxid, + scientificName, + genus_taxid, + genus_name, + family_taxid, + family_name, + superkingdom_taxid, + superkingdom_name, + strand, + oligo1, + error1, + tm1, + oligo2, + error2, + tm2, + amplength - o1->patlen - o2->patlen, + amplifia, + seq->DE + ); + } + else + printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %-32s | %2d | %5d | %s | %s\n", AC, seqlength, taxid, @@ -300,8 +345,12 @@ int main(int argc, char **argv) int32_t g=0; int32_t circular=0; + char compute_tm=0; + int32_t saltmethod=SALT_METHOD_SANTALUCIA; + double salt=0.05; + CNNParams tparm; - while ((carg = getopt(argc, argv, "hcd:l:L:e:i:r:k")) != -1) { + while ((carg = getopt(argc, argv, "hcd:l:L:e:i:r:km:a:t")) != -1) { switch (carg) { /* -------------------- */ @@ -366,7 +415,25 @@ int main(int argc, char **argv) circular = 1; break; - case '?': /* bad option */ + /* --------------------------- */ + case 't': /* compute tm of amplification */ + compute_tm = 1; /* --------------------------- */ + break; + /* --------------------------------- */ + case 'm': /* set salt method */ + /* --------------------------------- */ + sscanf(optarg,"%d",&(saltmethod)); + compute_tm = 1; + break; + + /* --------------------------------- */ + case 'a': /* set salt */ + /* --------------------------------- */ + sscanf(optarg,"%lf",&(salt)); + compute_tm = 1; + break; + + case '?': /* bad option */ /* -------------------- */ errflag++; } @@ -404,6 +471,11 @@ int main(int argc, char **argv) errflag++; } + if (compute_tm) + nparam_InitParams(&tparm,DEF_CONC_PRIMERS, + DEF_CONC_PRIMERS, + salt, + saltmethod); if (!oligo1 || !oligo2) errflag++; @@ -423,6 +495,18 @@ int main(int argc, char **argv) printf("# reverse strand oligo2 : %-32s ; oligo1c : %32s\n", o2->cpat,o1c->cpat); printf("# max error count by oligonucleotide : %d\n",error_max); + if (compute_tm) + { + double tm,tm1,tm2; + + tm1=nparam_CalcSelfTM(&tparm,o1->cpat,o1->patlen) - 273.15; + tm2=nparam_CalcSelfTM(&tparm,o2->cpat,o2->patlen) - 273.15; + tm = (tm1 < tm2) ? tm1:tm2; + + printf("# optimal Tm for primers 1 : %5.2f\n",tm1); + printf("# optimal Tm for primers 2 : %5.2f\n",tm2); + } + printf("# database : %s\n",prefix); if (lmin && lmax) printf("# amplifiat length between [%d,%d] bp\n",lmin,lmax); @@ -523,7 +607,7 @@ int main(int argc, char **argv) if (length && (!lmin || (length >= lmin)) && (!lmax || (length <= lmax))) - printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy); + printRepeat(seq,oligo1,oligo2,compute_tm,&tparm,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy); //printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname); } @@ -577,7 +661,7 @@ int main(int argc, char **argv) if (length && (!lmin || (length >= lmin)) && (!lmax || (length <= lmax))) - printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy); + printRepeat(seq,oligo1,oligo2,compute_tm,&tparm,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy); //printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname); } } diff --git a/src/global.mk b/src/global.mk index 44efeca..6b1f018 100644 --- a/src/global.mk +++ b/src/global.mk @@ -1,5 +1,5 @@ MACHINE=MAC_OS_X -LIBPATH= -Llibapat -LlibecoPCR +LIBPATH= -Llibapat -LlibecoPCR -Llibthermo MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $< CC=gcc diff --git a/src/libthermo/Makefile b/src/libthermo/Makefile new file mode 100644 index 0000000..e038b92 --- /dev/null +++ b/src/libthermo/Makefile @@ -0,0 +1,22 @@ + +SOURCES = nnparams.c + +SRCS=$(SOURCES) + +OBJECTS= $(patsubst %.c,%.o,$(SOURCES)) + +LIBFILE= libthermo.a +RANLIB= ranlib + + +include ../global.mk + + +all: $(LIBFILE) + +clean: + rm -rf $(OBJECTS) $(LIBFILE) + +$(LIBFILE): $(OBJECTS) + ar -cr $@ $? + $(RANLIB) $@ diff --git a/src/libthermo/nnparams.c b/src/libthermo/nnparams.c new file mode 100644 index 0000000..20d175f --- /dev/null +++ b/src/libthermo/nnparams.c @@ -0,0 +1,605 @@ +/* + * nnparams.cpp + * PHunterLib + * + * Nearest Neighbor Model / Parameters + * + * Created by Tiayyba Riaz on 7/2/09. + * + */ + +#include +#include +#include +#include +#include"nnparams.h" + +static char bpencoder[] = { 1, // A + 0, // b + 2, // C + 0,0,0, // d, e, f + 3, // G + 0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s + 4,0, // T,U + 0,0,0,0,0}; // v,w,x,y,z + + +double forbidden_entropy; + + +double nparam_GetInitialEntropy(PNNParams nparm) +{ + return -5.9f+nparm->rlogc; +} + + +//Retrieve Enthalpy for given NN-Pair from parameter table +double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1) +{ + return ndH(x0,x1,y0,y1); //xx, yx are already numbers +} + + +//Retrieve Entropy for given NN-Pair from parameter table +double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1) +{ + //xx and yx are already numbers + char nx0=x0;//nparam_convertNum(x0); + char nx1=x1;//nparam_convertNum(x1); + char ny0=y0;//nparam_convertNum(y0); + char ny1=y1;//nparam_convertNum(y1); + double answer = ndS(nx0,nx1,ny0,ny1); + /*Salt correction Santalucia*/ + if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) { + if(nx0!=5 && 1<= nx1 && nx1<=4) { + answer += 0.5*nparm->kfac; + } + if(ny1!=5 && 1<= ny0 && ny0<=4) { + answer += 0.5*nparm->kfac; + } + } + /*Salt correction Owczarzy*/ + if (nparm->saltMethod == SALT_METHOD_OWCZARZY) { + double logk = log(nparm->kplus); + answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk); + } + return answer; +} + +/* PURPOSE: Return melting temperature TM for given entropy and enthalpy +* Assuming a one-state transition and using the formula +* TM = dH / (dS + R ln(Ct/4)) +* entropy = dS + R ln Ct/4 (must already be included!) +* enthaklpy = dH +* where +* dH = enthalpy +* dS = entropy +* R = Boltzmann factor +* Ct = Strand Concentration +* +* PARAMETERS: +* entrypy and enthalpy +* +* RETURN VALUE: +* temperature +*/ + +double nparam_CalcTM(double entropy,double enthalpy) +{ + double tm = 0; // absolute zero - return if model fails! + if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact)) + return 0; + if (entropy<0) // avoid division by zero and model errors! + { + tm = enthalpy/entropy;// - kfac; //LKFEB + if (tm<0) + return 0; + } + return tm; +} + + +void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm) +{ + nparm->Ct1 = c1; + nparm->Ct2 = c2; + nparm->kplus = kp; + int maxCT = 1; + if(nparm->Ct2 > nparm->Ct1) + { + maxCT = 2; + } + double ctFactor; + if(nparm->Ct1 == nparm->Ct2) + { + ctFactor = nparm->Ct1/2; + } + else if (maxCT == 1) + { + ctFactor = nparm->Ct1-nparm->Ct2/2; + } + else + { + ctFactor = nparm->Ct2-nparm->Ct1/2; + } + nparm->rlogc = R * log(ctFactor); + forbidden_entropy = nparm->rlogc; + nparm->kfac = 0.368 * log (nparm->kplus); + nparm->saltMethod = sm; + int x,y,a,b; // variables used as counters... + + // Set all parameters to zero! + memset(nparm->dH,0,sizeof(nparm->dH)); + memset(nparm->dS,0,sizeof(nparm->dS)); + + // Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small! + for (x=1;x<=4;x++) + { + for (y=1;y<=4;y++) + { + ndH(0,x,y,0)=forbidden_enthalpy; + ndS(0,x,y,0)=forbidden_entropy; + ndH(x,0,0,y)=forbidden_enthalpy; + ndS(x,0,0,y)=forbidden_entropy; + ndH(x,0,y,0)=forbidden_enthalpy; + ndS(x,0,y,0)=forbidden_entropy; + // forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap! + ndH(x,5,y,0)=forbidden_enthalpy; + ndS(x,5,y,0)=forbidden_entropy; + ndH(x,0,y,5)=forbidden_enthalpy; + ndS(x,0,y,5)=forbidden_entropy; + ndH(5,x,0,y)=forbidden_enthalpy; + ndS(5,x,0,y)=forbidden_entropy; + ndH(0,x,5,y)=forbidden_enthalpy; + ndS(0,x,5,y)=forbidden_entropy; + // forbid X$/-Y etc. + ndH(x,5,0,y)=forbidden_enthalpy; + ndS(x,5,0,y)=forbidden_entropy; + ndH(x,0,5,y)=forbidden_enthalpy; + ndS(x,0,5,y)=forbidden_entropy; + ndH(5,x,y,0)=forbidden_enthalpy; + ndS(5,x,y,0)=forbidden_entropy; + ndH(0,x,y,5)=forbidden_enthalpy; + ndS(0,x,y,5)=forbidden_entropy; + + } + // also, forbid x-/-- and --/x-, i.e. no two inner gaps paired + ndH(x,0,0,0)=forbidden_enthalpy; + ndS(x,0,0,0)=forbidden_entropy; + ndH(0,0,x,0)=forbidden_enthalpy; + ndS(0,0,x,0)=forbidden_entropy; + // x-/-$ + ndH(x,0,0,5)=forbidden_enthalpy; + ndS(x,0,0,5)=forbidden_entropy; + ndH(5,0,0,x)=forbidden_enthalpy; + ndS(5,0,0,x)=forbidden_entropy; + ndH(0,5,x,0)=forbidden_enthalpy; + ndS(x,0,0,5)=forbidden_entropy; + ndH(0,x,5,0)=forbidden_enthalpy; + ndS(0,x,5,0)=forbidden_entropy; + } + // forbid --/-- + ndH(0,0,0,0)=forbidden_enthalpy; + ndS(0,0,0,0)=forbidden_entropy; + + ndH(5,0,0,0)=forbidden_enthalpy; + ndS(5,0,0,0)=forbidden_entropy; + ndH(0,0,5,0)=forbidden_enthalpy; + ndS(0,0,5,0)=forbidden_entropy; + ndH(0,5,5,0)=forbidden_enthalpy; + ndS(0,5,5,0)=forbidden_entropy; + + // Interior loops (double Mismatches) + #define iloop_entropy -0.97f + #define iloop_enthalpy 0.0f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + for (a=1; a<=4; a++) + for (b=1; b<=4; b++) + // AT and CG pair, and as A=1, C=2, G=3, T=4 this means + // we have Watson-Crick pairs if (x+a==5) and (y+b)==5. + if (!((x+a==5)||(y+b==5))) + { + // No watson-crick-pair, i.e. double mismatch! + // set enthalpy/entropy to loop expansion! + ndH(x,y,a,b) = iloop_enthalpy; + ndS(x,y,a,b) = iloop_entropy; + } + + // xy/-- and --/xy (Bulge Loops of size > 1) + #define bloop_entropy -1.3f + #define bloop_enthalpy 0.0f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + { + ndH(x,y,0,0) = bloop_enthalpy; + ndS(x,y,0,0) = bloop_entropy; + ndH(0,0,x,y) = bloop_enthalpy; + ndS(0,0,x,y) = bloop_entropy; + } + + // x-/ya abd xa/y- as well as -x/ay and ax/-y + // bulge opening and closing parameters with + // adjacent matches / mismatches + // obulge_mism and cbulge_mism chosen so high to avoid + // AAAAAAAAA + // T--G----T + // being better than + // AAAAAAAAA + // TG------T + #define obulge_match_H (-2.66f * 1000) + #define obulge_match_S -14.22f + #define cbulge_match_H (-2.66f * 1000) + #define cbulge_match_S -14.22f + #define obulge_mism_H (0.0f * 1000) + #define obulge_mism_S -6.45f + #define cbulge_mism_H 0.0f + #define cbulge_mism_S -6.45f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + for (a=1; a<=4; a++) + { + if (x+y==5) // other base pair matches! + { + ndH(x,0,y,a)=obulge_match_H; // bulge opening + ndS(x,0,y,a)=obulge_match_S; + ndH(x,a,y,0)=obulge_match_H; + ndS(x,a,y,0)=obulge_match_S; + ndH(0,x,a,y)=cbulge_match_H; // bulge closing + ndS(0,x,a,y)=cbulge_match_S; + ndH(a,x,0,y)=cbulge_match_H; + ndS(a,x,0,y)=cbulge_match_S; + } + else + { // mismatch in other base pair! + ndH(x,0,y,a)=obulge_mism_H; // bulge opening + ndS(x,0,y,a)=obulge_mism_S; + ndH(x,a,y,0)=obulge_mism_H; + ndS(x,a,y,0)=obulge_mism_S; + ndH(0,x,a,y)=cbulge_mism_H; // bulge closing + ndS(0,x,a,y)=cbulge_mism_S; + ndH(a,x,0,y)=cbulge_mism_H; + ndS(a,x,0,y)=cbulge_mism_S; + } + } + + // Watson-Crick pairs (note that only ten are unique, as obviously + // 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc. + ndH(1,1,4,4)=-7.6f*1000; ndS(1,1,4,4)=-21.3f; // AA/TT 04 + ndH(1,2,4,3)=-8.4f*1000; ndS(1,2,4,3)=-22.4f; // AC/TG adapted GT/CA + ndH(1,3,4,2)=-7.8f*1000; ndS(1,3,4,2)=-21.0f; // AG/TC adapted CT/GA + ndH(1,4,4,1)=-7.2f*1000; ndS(1,4,4,1)=-20.4f; // AT/TA 04 + ndH(2,1,3,4)=-8.5f*1000; ndS(2,1,3,4)=-22.7f; // CA/GT 04 + ndH(2,2,3,3)=-8.0f*1000; ndS(2,2,3,3)=-19.9f; // CC/GG adapted GG/CC + ndH(2,3,3,2)=-10.6f*1000; ndS(2,3,3,2)=-27.2f; // CG/GC 04 + ndH(2,4,3,1)=-7.8f*1000; ndS(2,4,3,1)=-21.0f; // CT/GA 04 + ndH(3,1,2,4)=-8.2f*1000; ndS(3,1,2,4)=-22.2f; // GA/CT 04 + ndH(3,2,2,3)=-9.8f*1000; ndS(3,2,2,3)=-24.4f; // GC/CG 04 + ndH(3,3,2,2)=-8.0f*1000; ndS(3,3,2,2)=-19.9f; // GG/CC 04 + ndH(3,4,2,1)=-8.4f*1000; ndS(3,4,2,1)=-22.4f; // GT/CA 04 + ndH(4,1,1,4)=-7.2f*1000; ndS(4,1,1,4)=-21.3f; // TA/AT 04 + ndH(4,2,1,3)=-8.2f*1000; ndS(4,2,1,3)=-22.2f; // TC/AG adapted GA/CT + ndH(4,3,1,2)=-8.5f*1000; ndS(4,3,1,2)=-22.7f; // TG/AC adapted CA/GT + ndH(4,4,1,1)=-7.6f*1000; ndS(4,4,1,1)=-21.3f; // TT/AA adapted AA/TT + + // A-C Mismatches (Values for pH 7.0) + ndH(1,1,2,4)=7.6f*1000; ndS(1,1,2,4)=20.2f; // AA/CT + ndH(1,1,4,2)=2.3f*1000; ndS(1,1,4,2)=4.6f; // AA/TC + ndH(1,2,2,3)=-0.7f*1000; ndS(1,2,2,3)=-3.8f; // AC/CG + ndH(1,2,4,1)=5.3f*1000; ndS(1,2,4,1)=14.6f; // AC/TA + ndH(1,3,2,2)=0.6f*1000; ndS(1,3,2,2)=-0.6f; // AG/CC + ndH(1,4,2,1)=5.3f*1000; ndS(1,4,2,1)=14.6f; // AT/CA + ndH(2,1,1,4)=3.4f*1000; ndS(2,1,1,4)=8.0f; // CA/AT + ndH(2,1,3,2)=1.9f*1000; ndS(2,1,3,2)=3.7f; // CA/GC + ndH(2,2,1,3)=5.2f*1000; ndS(2,2,1,3)=14.2f; // CC/AG + ndH(2,2,3,1)=0.6f*1000; ndS(2,2,3,1)=-0.6f; // CC/GA + ndH(2,3,1,2)=1.9f*1000; ndS(2,3,1,2)=3.7f; // CG/AC + ndH(2,4,1,1)=2.3f*1000; ndS(2,4,1,1)=4.6f; // CT/AA + ndH(3,1,2,2)=5.2f*1000; ndS(3,1,2,2)=14.2f; // GA/CC + ndH(3,2,2,1)=-0.7f*1000; ndS(3,2,2,1)=-3.8f; // GC/CA + ndH(4,1,1,2)=3.4f*1000; ndS(4,1,1,2)=8.0f; // TA/AC + ndH(4,2,1,1)=7.6f*1000; ndS(4,2,1,1)=20.2f; // TC/AA + + // C-T Mismatches + ndH(1,2,4,4)=0.7f*1000; ndS(1,2,4,4)=0.2f; // AC/TT + ndH(1,4,4,2)=-1.2f*1000; ndS(1,4,4,2)=-6.2f; // AT/TC + ndH(2,1,4,4)=1.0f*1000; ndS(2,1,4,4)=0.7f; // CA/TT + ndH(2,2,3,4)=-0.8f*1000; ndS(2,2,3,4)=-4.5f; // CC/GT + ndH(2,2,4,3)=5.2f*1000; ndS(2,2,4,3)=13.5f; // CC/TG + ndH(2,3,4,2)=-1.5f*1000; ndS(2,3,4,2)=-6.1f; // CG/TC + ndH(2,4,3,2)=-1.5f*1000; ndS(2,4,3,2)=-6.1f; // CT/GC + ndH(2,4,4,1)=-1.2f*1000; ndS(2,4,4,1)=-6.2f; // CT/TA + ndH(3,2,2,4)=2.3f*1000; ndS(3,2,2,4)=5.4f; // GC/CT + ndH(3,4,2,2)=5.2f*1000; ndS(3,4,2,2)=13.5f; // GT/CC + ndH(4,1,2,4)=1.2f*1000; ndS(4,1,2,4)=0.7f; // TA/CT + ndH(4,2,2,3)=2.3f*1000; ndS(4,2,2,3)=5.4f; // TC/CG + ndH(4,2,1,4)=1.2f*1000; ndS(4,2,1,4)=0.7f; // TC/AT + ndH(4,3,2,2)=-0.8f*1000; ndS(4,3,2,2)=-4.5f; // TG/CC + ndH(4,4,2,1)=0.7f*1000; ndS(4,4,2,1)=0.2f; // TT/CA + ndH(4,4,1,2)=1.0f*1000; ndS(4,4,1,2)=0.7f; // TT/AC + + // G-A Mismatches + ndH(1,1,3,4)=3.0f*1000; ndS(1,1,3,4)=7.4f; // AA/GT + ndH(1,1,4,3)=-0.6f*1000; ndS(1,1,4,3)=-2.3f; // AA/TG + ndH(1,2,3,3)=0.5f*1000; ndS(1,2,3,3)=3.2f; // AC/GG + ndH(1,3,3,2)=-4.0f*1000; ndS(1,3,3,2)=-13.2f; // AG/GC + ndH(1,3,4,1)=-0.7f*1000; ndS(1,3,4,1)=-2.3f; // AG/TA + ndH(1,4,3,1)=-0.7f*1000; ndS(1,4,3,1)=-2.3f; // AT/GA + ndH(2,1,3,3)=-0.7f*1000; ndS(2,1,3,3)=-2.3f; // CA/GG + ndH(2,3,3,1)=-4.0f*1000; ndS(2,3,3,1)=-13.2f; // CG/GA + ndH(3,1,1,4)=0.7f*1000; ndS(3,1,1,4)=0.7f; // GA/AT + ndH(3,1,2,3)=-0.6f*1000; ndS(3,1,2,3)=-1.0f; // GA/CG + ndH(3,2,1,3)=-0.6f*1000; ndS(3,2,1,3)=-1.0f; // GC/AG + ndH(3,3,1,2)=-0.7f*1000; ndS(3,3,1,2)=-2.3f; // GG/AC + ndH(3,3,2,1)=0.5f*1000; ndS(3,3,2,1)=3.2f; // GG/CA + ndH(3,4,1,1)=-0.6f*1000; ndS(3,4,1,1)=-2.3f; // GT/AA + ndH(4,1,1,3)=0.7f*1000; ndS(4,1,1,3)=0.7f; // TA/AG + ndH(4,3,1,1)=3.0f*1000; ndS(4,3,1,1)=7.4f; // TG/AA + + // G-T Mismatches + ndH(1,3,4,4)=1.0f*1000; ndS(1,3,4,4)=0.9f; // AG/TT + ndH(1,4,4,3)=-2.5f*1000; ndS(1,4,4,3)=-8.3f; // AT/TG + ndH(2,3,3,4)=-4.1f*1000; ndS(2,3,3,4)=-11.7f; // CG/GT + ndH(2,4,3,3)=-2.8f*1000; ndS(2,4,3,3)=-8.0f; // CT/GG + ndH(3,1,4,4)=-1.3f*1000; ndS(3,1,4,4)=-5.3f; // GA/TT + ndH(3,2,4,3)=-4.4f*1000; ndS(3,2,4,3)=-12.3f; // GC/TG + ndH(3,3,2,4)=3.3f*1000; ndS(3,3,2,4)=10.4f; // GG/CT + ndH(3,3,4,2)=-2.8f*1000; ndS(3,3,4,2)=-8.0f; // GG/TC +// ndH(3,3,4,4)=5.8f*1000; ndS(3,3,4,4)=16.3f; // GG/TT + ndH(3,4,2,3)=-4.4f*1000; ndS(3,4,2,3)=-12.3f; // GT/CG + ndH(3,4,4,1)=-2.5f*1000; ndS(3,4,4,1)=-8.3f; // GT/TA +// ndH(3,4,4,3)=4.1f*1000; ndS(3,4,4,3)=9.5f; // GT/TG + ndH(4,1,3,4)=-0.1f*1000; ndS(4,1,3,4)=-1.7f; // TA/GT + ndH(4,2,3,3)=3.3f*1000; ndS(4,2,3,3)=10.4f; // TC/GG + ndH(4,3,1,4)=-0.1f*1000; ndS(4,3,1,4)=-1.7f; // TG/AT + ndH(4,3,3,2)=-4.1f*1000; ndS(4,3,3,2)=-11.7f; // TG/GC +// ndH(4,3,3,4)=-1.4f*1000; ndS(4,3,3,4)=-6.2f; // TG/GT + ndH(4,4,1,3)=-1.3f*1000; ndS(4,4,1,3)=-5.3f; // TT/AG + ndH(4,4,3,1)=1.0f*1000; ndS(4,4,3,1)=0.9f; // TT/GA +// ndH(4,4,3,3)=5.8f*1000; ndS(4,4,3,3)=16.3f; // TT/GG + + // A-A Mismatches + ndH(1,1,1,4)=4.7f*1000; ndS(1,1,1,4)=12.9f; // AA/AT + ndH(1,1,4,1)=1.2f*1000; ndS(1,1,4,1)=1.7f; // AA/TA + ndH(1,2,1,3)=-2.9f*1000; ndS(1,2,1,3)=-9.8f; // AC/AG + ndH(1,3,1,2)=-0.9f*1000; ndS(1,3,1,2)=-4.2f; // AG/AC + ndH(1,4,1,1)=1.2f*1000; ndS(1,4,1,1)=1.7f; // AT/AA + ndH(2,1,3,1)=-0.9f*1000; ndS(2,1,3,1)=-4.2f; // CA/GA + ndH(3,1,2,1)=-2.9f*1000; ndS(3,1,2,1)=-9.8f; // GA/CA + ndH(4,1,1,1)=4.7f*1000; ndS(4,1,1,1)=12.9f; // TA/AA + + // C-C Mismatches + ndH(1,2,4,2)=0.0f*1000; ndS(1,2,4,2)=-4.4f; // AC/TC + ndH(2,1,2,4)=6.1f*1000; ndS(2,1,2,4)=16.4f; // CA/CT + ndH(2,2,2,3)=3.6f*1000; ndS(2,2,2,3)=8.9f; // CC/CG + ndH(2,2,3,2)=-1.5f*1000; ndS(2,2,3,2)=-7.2f; // CC/GC + ndH(2,3,2,2)=-1.5f*1000; ndS(2,3,2,2)=-7.2f; // CG/CC + ndH(2,4,2,1)=0.0f*1000; ndS(2,4,2,1)=-4.4f; // CT/CA + ndH(3,2,2,2)=3.6f*1000; ndS(3,2,2,2)=8.9f; // GC/CC + ndH(4,2,1,2)=6.1f*1000; ndS(4,2,1,2)=16.4f; // TC/AC + + // G-G Mismatches + ndH(1,3,4,3)=-3.1f*1000; ndS(1,3,4,3)=-9.5f; // AG/TG + ndH(2,3,3,3)=-4.9f*1000; ndS(2,3,3,3)=-15.3f; // CG/GG + ndH(3,1,3,4)=1.6f*1000; ndS(3,1,3,4)=3.6f; // GA/GT + ndH(3,2,3,3)=-6.0f*1000; ndS(3,2,3,3)=-15.8f; // GC/GG + ndH(3,3,2,3)=-6.0f*1000; ndS(3,3,2,3)=-15.8f; // GG/CG + ndH(3,3,3,2)=-4.9f*1000; ndS(3,3,3,2)=-15.3f; // GG/GC + ndH(3,4,3,1)=-3.1f*1000; ndS(3,4,3,1)=-9.5f; // GT/GA + ndH(4,3,1,3)=1.6f*1000; ndS(4,3,1,3)=3.6f; // TG/AG + + // T-T Mismatches + ndH(1,4,4,4)=-2.7f*1000; ndS(1,4,4,4)=-10.8f; // AT/TT + ndH(2,4,3,4)=-5.0f*1000; ndS(2,4,3,4)=-15.8f; // CT/GT + ndH(3,4,2,4)=-2.2f*1000; ndS(3,4,2,4)=-8.4f; // GT/CT + ndH(4,1,4,4)=0.2f*1000; ndS(4,1,4,4)=-1.5f; // TA/TT + ndH(4,2,4,3)=-2.2f*1000; ndS(4,2,4,3)=-8.4f; // TC/TG + ndH(4,3,4,2)=-5.0f*1000; ndS(4,3,4,2)=-15.8f; // TG/TC + ndH(4,4,1,4)=0.2f*1000; ndS(4,4,1,4)=-1.5f; // TT/AT + ndH(4,4,4,1)=-2.7f*1000; ndS(4,4,4,1)=-10.8f; // TT/TA + + // Dangling Ends + ndH(5,1,1,4)=-0.7f*1000; ndS(5,1,1,4)=-0.8f; // $A/AT + ndH(5,1,2,4)=4.4f*1000; ndS(5,1,2,4)=14.9f; // $A/CT + ndH(5,1,3,4)=-1.6f*1000; ndS(5,1,3,4)=-3.6f; // $A/GT + ndH(5,1,4,4)=2.9f*1000; ndS(5,1,4,4)=10.4f; // $A/TT + ndH(5,2,1,3)=-2.1f*1000; ndS(5,2,1,3)=-3.9f; // $C/AG + ndH(5,2,2,3)=-0.2f*1000; ndS(5,2,2,3)=-0.1f; // $C/CG + ndH(5,2,3,3)=-3.9f*1000; ndS(5,2,3,3)=-11.2f; // $C/GG + ndH(5,2,4,3)=-4.4f*1000; ndS(5,2,4,3)=-13.1f; // $C/TG + ndH(5,3,1,2)=-5.9f*1000; ndS(5,3,1,2)=-16.5f; // $G/AC + ndH(5,3,2,2)=-2.6f*1000; ndS(5,3,2,2)=-7.4f; // $G/CC + ndH(5,3,3,2)=-3.2f*1000; ndS(5,3,3,2)=-10.4f; // $G/GC + ndH(5,3,4,2)=-5.2f*1000; ndS(5,3,4,2)=-15.0f; // $G/TC + ndH(5,4,1,1)=-0.5f*1000; ndS(5,4,1,1)=-1.1f; // $T/AA + ndH(5,4,2,1)=4.7f*1000; ndS(5,4,2,1)=14.2f; // $T/CA + ndH(5,4,3,1)=-4.1f*1000; ndS(5,4,3,1)=-13.1f; // $T/GA + ndH(5,4,4,1)=-3.8f*1000; ndS(5,4,4,1)=-12.6f; // $T/TA + ndH(1,5,4,1)=-2.9f*1000; ndS(1,5,4,1)=-7.6f; // A$/TA + ndH(1,5,4,2)=-4.1f*1000; ndS(1,5,4,2)=-13.0f; // A$/TC + ndH(1,5,4,3)=-4.2f*1000; ndS(1,5,4,3)=-15.0f; // A$/TG + ndH(1,5,4,4)=-0.2f*1000; ndS(1,5,4,4)=-0.5f; // A$/TT + ndH(1,1,5,4)=0.2f*1000; ndS(1,1,5,4)=2.3f; // AA/$T + ndH(1,1,4,5)=-0.5f*1000; ndS(1,1,4,5)=-1.1f; // AA/T$ + ndH(1,2,5,3)=-6.3f*1000; ndS(1,2,5,3)=-17.1f; // AC/$G + ndH(1,2,4,5)=4.7f*1000; ndS(1,2,4,5)=14.2f; // AC/T$ + ndH(1,3,5,2)=-3.7f*1000; ndS(1,3,5,2)=-10.0f; // AG/$C + ndH(1,3,4,5)=-4.1f*1000; ndS(1,3,4,5)=-13.1f; // AG/T$ + ndH(1,4,5,1)=-2.9f*1000; ndS(1,4,5,1)=-7.6f; // AT/$A + ndH(1,4,4,5)=-3.8f*1000; ndS(1,4,4,5)=-12.6f; // AT/T$ + ndH(2,5,3,1)=-3.7f*1000; ndS(2,5,3,1)=-10.0f; // C$/GA + ndH(2,5,3,2)=-4.0f*1000; ndS(2,5,3,2)=-11.9f; // C$/GC + ndH(2,5,3,3)=-3.9f*1000; ndS(2,5,3,3)=-10.9f; // C$/GG + ndH(2,5,3,4)=-4.9f*1000; ndS(2,5,3,4)=-13.8f; // C$/GT + ndH(2,1,5,4)=0.6f*1000; ndS(2,1,5,4)=3.3f; // CA/$T + ndH(2,1,3,5)=-5.9f*1000; ndS(2,1,3,5)=-16.5f; // CA/G$ + ndH(2,2,5,3)=-4.4f*1000; ndS(2,2,5,3)=-12.6f; // CC/$G + ndH(2,2,3,5)=-2.6f*1000; ndS(2,2,3,5)=-7.4f; // CC/G$ + ndH(2,3,5,2)=-4.0f*1000; ndS(2,3,5,2)=-11.9f; // CG/$C + ndH(2,3,3,5)=-3.2f*1000; ndS(2,3,3,5)=-10.4f; // CG/G$ + ndH(2,4,5,1)=-4.1f*1000; ndS(2,4,5,1)=-13.0f; // CT/$A + ndH(2,4,3,5)=-5.2f*1000; ndS(2,4,3,5)=-15.0f; // CT/G$ + ndH(3,5,2,1)=-6.3f*1000; ndS(3,5,2,1)=-17.1f; // G$/CA + ndH(3,5,2,2)=-4.4f*1000; ndS(3,5,2,2)=-12.6f; // G$/CC + ndH(3,5,2,3)=-5.1f*1000; ndS(3,5,2,3)=-14.0f; // G$/CG + ndH(3,5,2,4)=-4.0f*1000; ndS(3,5,2,4)=-10.9f; // G$/CT + ndH(3,1,5,4)=-1.1f*1000; ndS(3,1,5,4)=-1.6f; // GA/$T + ndH(3,1,2,5)=-2.1f*1000; ndS(3,1,2,5)=-3.9f; // GA/C$ + ndH(3,2,5,3)=-5.1f*1000; ndS(3,2,5,3)=-14.0f; // GC/$G + ndH(3,2,2,5)=-0.2f*1000; ndS(3,2,2,5)=-0.1f; // GC/C$ + ndH(3,3,5,2)=-3.9f*1000; ndS(3,3,5,2)=-10.9f; // GG/$C + ndH(3,3,2,5)=-3.9f*1000; ndS(3,3,2,5)=-11.2f; // GG/C$ + ndH(3,4,5,1)=-4.2f*1000; ndS(3,4,5,1)=-15.0f; // GT/$A + ndH(3,4,2,5)=-4.4f*1000; ndS(3,4,2,5)=-13.1f; // GT/C$ + ndH(4,5,1,1)=0.2f*1000; ndS(4,5,1,1)=2.3f; // T$/AA + ndH(4,5,1,2)=0.6f*1000; ndS(4,5,1,2)=3.3f; // T$/AC + ndH(4,5,1,3)=-1.1f*1000; ndS(4,5,1,3)=-1.6f; // T$/AG + ndH(4,5,1,4)=-6.9f*1000; ndS(4,5,1,4)=-20.0f; // T$/AT + ndH(4,1,5,4)=-6.9f*1000; ndS(4,1,5,4)=-20.0f; // TA/$T + ndH(4,1,1,5)=-0.7f*1000; ndS(4,1,1,5)=-0.7f; // TA/A$ + ndH(4,2,5,3)=-4.0f*1000; ndS(4,2,5,3)=-10.9f; // TC/$G + ndH(4,2,1,5)=4.4f*1000; ndS(4,2,1,5)=14.9f; // TC/A$ + ndH(4,3,5,2)=-4.9f*1000; ndS(4,3,5,2)=-13.8f; // TG/$C + ndH(4,3,1,5)=-1.6f*1000; ndS(4,3,1,5)=-3.6f; // TG/A$ + ndH(4,4,5,1)=-0.2f*1000; ndS(4,4,5,1)=-0.5f; // TT/$A + ndH(4,4,1,5)=2.9f*1000; ndS(4,4,1,5)=10.4f; // TT/A$ + + return; +} + +int nparam_CountGCContent(char * seq ) { + int lseq = strlen(seq); + int k; + double count = 0; + for( k=0;krlogc; + double mtemp; + char c1; + char c2; + char c3; + char c4; + unsigned int i; + char nseq[50]; + char *useq = seq; + + nparam_CleanSeq (seq, nseq, len); + useq = nseq; + + for ( i=1;idH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2); + thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2); + } + //printf("------------------\n"); + mtemp = nparam_CalcTM(thedS,thedH); + //fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s rloc=%f\n", thedH, thedS, useq, nparm->rlogc); + //exit (0); + return mtemp; +} + +double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len) +{ + double thedH = 0; + //double thedS = nparam_GetInitialEntropy(nparm); + double thedS = -5.9f+nparm->rlogc; + double mtemp; + char c1; + char c2; + char c3; + char c4; + unsigned int i; + char nseq1[50]; + char nseq2[50]; + char *useq1; + char *useq2; + + nparam_CleanSeq (seq1, nseq1, len); + useq1 = nseq1; + + nparam_CleanSeq (seq2, nseq2, len); + useq2 = nseq2; + + //fprintf (stderr,"Primer : %s\n",useq); + for ( i=1;idH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2); + thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2); + } + //fprintf(stderr,"------------------\n"); + mtemp = nparam_CalcTM(thedS,thedH); + //if (mtemp == 0) + //{ + // fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq); + //exit (0); + //} + return mtemp; +} + +double calculateMeltingTemperatureBasic (char * seq) { + int gccount; + double temp; + int seqlen; + + seqlen = strlen (seq); + gccount = nparam_CountGCContent (seq); + temp = 64.9 + 41*(gccount - 16.4)/seqlen; + return temp; +} diff --git a/src/libthermo/nnparams.h b/src/libthermo/nnparams.h new file mode 100644 index 0000000..7934858 --- /dev/null +++ b/src/libthermo/nnparams.h @@ -0,0 +1,63 @@ +/* + * nnparams.h + * PHunterLib + * + * Nearest Neighbor Model Parameters + * + * Created by Tiayyba Riaz on 02/07/09. + * + */ + +#ifndef NNPARAMS_H_ +#define NNPARAMS_H_ + +#include +#include +//#include "../libecoprimer/ecoprimer.h" + +// following defines to simplify coding... +#define ndH(a,b,c,d) nparm->dH[(int)a][(int)b][(int)c][(int)d] +#define ndS(a,b,c,d) nparm->dS[(int)a][(int)b][(int)c][(int)d] +#define forbidden_enthalpy 1000000000000000000.0f +#define R 1.987f +#define SALT_METHOD_SANTALUCIA 1 +#define SALT_METHOD_OWCZARZY 2 + +#define DEF_CONC_PRIMERS 0.0000008 +#define DEF_CONC_SEQUENCES 0 +#define DEF_SALT 0.05 + +#define GETNUMCODE(a) bpencoder[a - 'A'] +#define GETREVCODE(a) 5-bpencoder[a - 'A'] + + +extern double forbidden_entropy; + + +typedef struct CNNParams_st +{ + double Ct1; + double Ct2; + double rlogc; + double kplus; + double kfac; + int saltMethod; + double gcContent; + double new_TM; + double dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign) + double dS[6][6][6][6]; +}CNNParams, * PNNParams; + +void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm); +int nparam_CountGCContent(char * seq ); +double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1); +double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1); +double nparam_CalcTM(double entropy,double enthalpy); +double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len); +double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len); + +double nparam_GetInitialEntropy(PNNParams nparm) ; +double calculateMeltingTemperatureBasic (char * seq); +//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options); + +#endif