commit -m "Cleaned code for thermodynamics properties and added Melting Temperature for approximate version of strict repeats. Also cleaned printing code.

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@223 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2009-07-13 09:26:19 +00:00
parent 91753ace82
commit e79738e170
8 changed files with 305 additions and 564 deletions

View File

@ -6,7 +6,6 @@
*/
#include "libecoprimer/ecoprimer.h"
#include "libthermo/libthermo.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
@ -15,6 +14,9 @@
#include <time.h>
#include <sys/time.h>
#include <dlfcn.h>
#include"libthermo/nnparams.h"
#include"libthermo/thermostats.h"
#define VERSION "0.3"
/* TR: by default, statistics are made on species level*/
@ -153,37 +155,10 @@ void initoptions(poptions_t options)
options->r=0;
options->g=0;
options->no_multi_match=FALSE;
options->pnparm = NULL;
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
}
void printcurrenttime ()
{
time_t now;
struct tm *ts;
char buf[80];
/* Get the current time */
now = time(NULL);
/* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
ts = localtime(&now);
strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
fprintf(stderr,"#%d#, %s\n",(int)now, buf);
}
void printcurrenttimeinmilli()
{
struct timeval tv;
struct timezone tz;
struct tm *tm;
gettimeofday(&tv, &tz);
tm=localtime(&tv.tv_sec);
fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min,
tm->tm_sec, tv.tv_usec, tv.tv_usec/1000);
}
/*TR: Added*/
void printapair(int32_t index,ppair_t pair, poptions_t options)
{
bool_t asdirect1=pair->asdirect1;
@ -198,6 +173,9 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
bool_t strand;
uint32_t i;
float temp;
CNNParams nnparams;
//nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
char *c;
char p1[32];
@ -225,6 +203,7 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
good2=goodtmp;
}
//print serial number
printf("%6d\t",index);
c = ecoUnhashWord(w1,options->primer_length);
@ -232,57 +211,67 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
c = ecoUnhashWord(w2,options->primer_length);
strcpy (p2, c);
//print primer1
printf("%s\t", p1);
//print primer2
printf("%s", p2);
/*For thermo: first word should be 5'-3'*/
/*if (!asdirect1)
{
wtmp = ecoComplementWord(w1,options->primer_length);
c = ecoUnhashWord(wtmp,options->primer_length);
strcpy (p1, c);
}*/
/*For thermo: first word should be 5'-3'*/
/*if (asdirect2)
{
wtmp = ecoComplementWord(w2,options->primer_length);
c = ecoUnhashWord(wtmp,options->primer_length);
strcpy (p2, c);
}*/
//print primer1 melting temperature
printf ("\t%4.3f", pair->p1temp);
/*temp = calculateMeltingTemperature (p1, p2);*/
//float temp = calculateMeltingTemperature ("CTGTTTACCAAAAACATC", "GGTCTGAACTCAGATCAC");
temp = calculateMeltingTemperatureBasic(p1);
printf ("\t%4.3f", temp);
temp = calculateMeltingTemperatureBasic(p2);
printf ("\t%4.3f", temp);
printf ("\t%d",countGCContent(p1));
printf ("\t%d",countGCContent(p2));
//print minimum melting temperature of approximate versions of primer1
printf ("\t%4.3f", pair->p1mintemp);
//print primer2 melting temperature
printf ("\t%4.3f", pair->p2temp);
//print minimum melting temperature of approximate versions of primer2
printf ("\t%4.3f", pair->p2mintemp);
//print gc contents of primer1
printf ("\t%d",nparam_CountGCContent(p1));
//print gc contents of primer2
printf ("\t%d",nparam_CountGCContent(p2));
//print good/bad pair indicator
printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]);
//print inexample count
printf("\t%d", pair->inexample);
//print out example count
printf("\t%d", pair->outexample);
//print yule
printf("\t%4.3f", pair->yule);
//print in taxa count
printf("\t%d", pair->intaxa);
//print out taxa count
printf("\t%d", pair->outtaxa);
//print coverage
printf("\t%4.3f", (float)pair->bc);
//print well identified taxa count
printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa);
//print specificity
printf("\t%4.3f", pair->bs);
//print min amplifia lenght
printf("\t%d", pair->mind);
//print max amplifia lenght
printf("\t%d", pair->maxd);
//print average amplifia lenght
printf("\t%3.2f", (float)pair->sumd/pair->inexample);
//print amplifia information about reference sequence if specified
if (options->refseq && pair->refsequence >=0)
{
printf("\t%s:",options->reference);
@ -351,31 +340,21 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0;
sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q - qfp;
sortedpairs[j]=sortedpairs[i];
if (q > options->sensitivity_quorum &&
qfp < options->false_positive_quorum)
{
(void)taxonomycoverage(sortedpairs[j],options);
taxonomyspecificity(sortedpairs[j]);
// fprintf(stderr,"%4.3f %4.3f %4.3f\n",sortedpairs[j]->yule , sortedpairs[j]->bs,sortedpairs[j]->bc);
j++;
}
}
qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs);
return j;
}
@ -410,7 +389,8 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
sortedpairs[j]=pl->pairs+i;
count=filterandsortpairs(sortedpairs,pairs->count,options);
getThermoProperties(sortedpairs, count, options);
fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count);
printf("#\n");
@ -477,7 +457,6 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
}
/*updateseqparams: This function counts the insample and outsample sequences
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
@ -556,6 +535,12 @@ int main(int argc, char **argv)
//float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC");
//printf ("temp = %f\n", temp);
//return 0;
//char *t = "ACGT";
//printf ("\nGETNUMCODE: A=%d, C=%d, G=%d, T=%d\n", GETNUMCODE(t[0]), GETNUMCODE('C'), GETNUMCODE('G'), GETNUMCODE('T'));
//printf ("\nGETREVCODE: A=%d, C=%d, G=%d, T=%d\n", GETREVCODE(t[0]), GETREVCODE('C'), GETREVCODE('G'), GETREVCODE('T'));
//return 0;
CNNParams nnparams;
nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
initoptions(&options);
@ -701,7 +686,7 @@ int main(int argc, char **argv)
}
}
options.pnparm = &nnparams;
fprintf(stderr,"Reading taxonomy database ...");
taxonomy = read_taxonomy(options.prefix,0);
fprintf(stderr,"Ok\n");
@ -741,7 +726,7 @@ int main(int argc, char **argv)
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
// options.filtering=FALSE;
// words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
// fprintf(stderr,"\n Strict primer count : %d\n",words2->size);
@ -789,17 +774,10 @@ int main(int argc, char **argv)
fprintf(stderr,"\n");
/*TR: Added*/
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
//fprintf(stderr,"buildPrimerPairs finished\n");
// setoktaxforspecificity (&pairs);
printpairs (pairs, &options,taxonomy);
// closlibman ();
//ECOFREE(pairs.pairs,"Free pairs table");
return 0;
}

View File

@ -13,6 +13,7 @@
#include <stdio.h>
#include "ecotype.h"
#include "../libecoPCR/ecoPCR.h"
#include "../libthermo/nnparams.h"
#include "apat.h"
#define DEBUG
@ -190,7 +191,10 @@ typedef struct {
//
// uint32_t oktaxoncount;
uint32_t curseqid;
float p1temp; //strict primer1 melting temperature
float p1mintemp; //approx primer1 minimum melting temperature
float p2temp; //strict primer2 melting temperature
float p2mintemp; //approx primer2 minimum melting temperature
} pair_t, *ppair_t;
/*TR: Added*/
@ -274,6 +278,7 @@ typedef struct {
int32_t outsamples;
int32_t intaxa;
int32_t outtaxa;
PNNParams pnparm;
} options_t, *poptions_t;
typedef ecoseq_t **pecodnadb_t;
@ -338,5 +343,6 @@ void taxonomyspecificity (ppair_t pair);
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum);
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize);
#endif /* EPSORT_H_ */

View File

@ -156,16 +156,14 @@ ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t
{
uint32_t i;
ppairtree_t primerpairs;
primerpairs = initpairtree(NULL);
for (i=0; i < seqdbsize; i++)
{
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
}
return primerpairs;
}
#define DMAX (2000000000)
@ -187,7 +185,11 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
bool_t bswp;
size_t distance;
bool_t strand;
//char prmr[50];
//float mtemp;
//prmr[options->primer_length] = '\0';
for (i=0;i < primers->size; i++)
{
matchcount+=primers->primers[i].directCount[seqid];
@ -272,10 +274,12 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
current.outexample=0;
current.curseqid = 0;
current.refsequence=-1;
//current.p1temp = 100;
//current.p1mintemp = 100;
//current.p2temp = 100;
//current.p2mintemp = 100;
// Standardize the pair
strand = current.p2->word > current.p1->word;
if (!strand)
{
@ -309,6 +313,10 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
//else
// pcurrent->outexample++;
//for each pair we save current sequence id in the pair
//when we see this pair for the first time in currnet sequence
//because we want to increment inexample & outexample count
//only once for one sequence
if (pcurrent->curseqid != (seqid+1))
{
if (seqdb[seqid]->isexample)
@ -359,6 +367,23 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
else
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
/*strncpy (prmr, seqdb[seqid]->SQ + matches[i].position, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pcurrent->p1mintemp)
pcurrent->p1mintemp = mtemp;
//fprintf (stderr, "prmr1: %s\n", seqdb[seqid]->SQ);
strncpy (prmr, seqdb[seqid]->SQ + matches[j].position, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pcurrent->p2mintemp)
pcurrent->p2mintemp = mtemp;
//fprintf (stderr, "prmr2: %s\n", prmr);
if (pcurrent->p1temp == 100)
pcurrent->p1temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p1->word, options->primer_length), 0) - 273.0;
if (pcurrent->p2temp == 100)
pcurrent->p2temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p2->word, options->primer_length), 0) - 273.0;
*/
pcurrent->pcr.ampcount++;
// fprintf(stderr,"%c%c W1 : %s direct : %c",
// "bG"[(int)pcurrent->p1->good],

View File

@ -33,3 +33,18 @@ pecodnadb_t readdnadb(const char *name, uint32_t *size)
return db;
}
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize)
{
uint32_t i;
char ch[11];
ch [10] = '\0';
for (i=0; i < seqdbsize; i++)
{
strncpy (ch, seqdb[i]->SQ, 10);
fprintf (stderr, "seq %d = %s\n", i, ch);
}
exit (0);
}

View File

@ -1,9 +1,6 @@
SOURCES = dinkelbach.c \
galign.c \
libthermo.c \
nnparams.c \
thermalign.c
SOURCES = nnparams.c \
thermostats.c
SRCS=$(SOURCES)

Binary file not shown.

View File

@ -2,68 +2,101 @@
* nnparams.cpp
* PHunterLib
*
* Nearest Neighbor Model / Parameters
*
* Created by Tiayyba Riaz on 7/2/09.
* Copyright 2009 __MyCompanyName__. All rights reserved.
*
*/
//=============================================================================
// Module: nnparams.cpp
// Project: Diploma Thesis - Probe Selection for DNA Microarrays
// Type: implementation - Nearest Neighbor Model / Parameters
// Language: c++
// Compiler: microsoft visual c++ 6.0, unix/linux gcc
// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested)
// Database: none
// Description: class CNNParams - Nearest Neighbor Model / Parameters
// Author: kaderali
// Date: 12/2000
// Copyright: (c) L. Kaderali, 9/2000 - 12/2000
//
// Revision History
// $ 00sep07 LK : created
// 00dec17 LK : modified to use new nn parameters
// 00dec29 LK : modified to include dangling end parameters
// 01jan09 LK : included CalcSelfTM
// 01feb07 LK : optimized
// #$
//=============================================================================
#include <memory.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include"nnparams.h"
#ifdef _pack
#pragma pack(1)
#endif
float forbidden_entropy;
///////////////////////////////////////////////////////////////////////////////
// Construction/Destruction
///////////////////////////////////////////////////////////////////////////////
float nparam_GetInitialEntropy(PNNParams nparm)
{
return -5.9f+nparm->rlogc;
}
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: void CNNParams::InitParams()
//
// PURPOSE: Initialize nearest neighbor parameters. For now, simply set
// params as requiered. Extend to read from file sometime.
//
// PARAMETERS:
// none
//
// RETURN VALUE:
// void
//
// REVISION HISTORY
// $ 00sep06 : created LK
// 00dec17 : modified to use new parameters LK
// 00dec29 : included dangling end data LK
// #$
//Retrieve Enthalpy for given NN-Pair from parameter table
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
return ndH(x0,x1,y0,y1); //xx, yx are already numbers
}
//Retrieve Entropy for given NN-Pair from parameter table
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
//xx and yx are already numbers
char nx0=x0;//nparam_convertNum(x0);
char nx1=x1;//nparam_convertNum(x1);
char ny0=y0;//nparam_convertNum(y0);
char ny1=y1;//nparam_convertNum(y1);
float answer = ndS(nx0,nx1,ny0,ny1);
/*Salt correction Santalucia*/
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
if(nx0!=5 && 1<= nx1 && nx1<=4) {
answer += 0.5*nparm->kfac;
}
if(ny1!=5 && 1<= ny0 && ny0<=4) {
answer += 0.5*nparm->kfac;
}
}
/*Salt correction Owczarzy*/
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
float logk = log(nparm->kplus);
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
}
return answer;
}
/* PURPOSE: Return melting temperature TM for given entropy and enthalpy
* Assuming a one-state transition and using the formula
* TM = dH / (dS + R ln(Ct/4))
* entropy = dS + R ln Ct/4 (must already be included!)
* enthaklpy = dH
* where
* dH = enthalpy
* dS = entropy
* R = Boltzmann factor
* Ct = Strand Concentration
*
* PARAMETERS:
* entrypy and enthalpy
*
* RETURN VALUE:
* temperature
*/
float nparam_CalcTM(float entropy,float enthalpy)
{
float tm = 0; // absolute zero - return if model fails!
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
return 0;
if (entropy<0) // avoid division by zero and model errors!
{
tm = enthalpy/entropy;// - kfac; //LKFEB
if (tm<0)
return 0;
}
return tm;
}
/* PURPOSE: Initialize nearest neighbor parameters.
*
* PARAMETERS:
* none
*
* RETURN VALUE:
* void
*/
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
{
@ -433,23 +466,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
return;
}
void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2)
{
float l1 = strlen(s1);
float l2 = strlen(s2);
if(l1<l2) {
nparm->gcContent = nparam_CountGCContent(s1);
nparm->gcContent /= (l1-2);
} else if (l1>l2) {
nparm->gcContent = nparam_CountGCContent(s2);
nparm->gcContent /= (l2-2);
} else {
nparm->gcContent = nparam_CountGCContent(s1)+nparam_CountGCContent(s2);
nparm->gcContent /= (l1+l2-4);
}
}
float nparam_CountGCContent(char * seq ) {
int nparam_CountGCContent(char * seq ) {
int lseq = strlen(seq);
int k;
float count = 0;
@ -461,352 +478,89 @@ float nparam_CountGCContent(char * seq ) {
return count;
}
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: static char CNNParams::getComplement(char mychar)
//
// PURPOSE: return Watson-Crick complement to mychar
//
// PARAMETERS:
// mychar - Character to complement
//
// RETURN VALUE:
// char - complement to mychar
//
// REVISION HISTORY
// $ 00sep28 : created LK
// #$
char nparam_getComplement(char mychar, int asnum)
void nparam_CleanSeq (char* inseq, char* outseq, int len)
{
if (mychar==1) // A -> T
return 4;
if (mychar==2) // C -> G
return 3;
if (mychar==3) // G -> T
return 2;
if (mychar==4) // T -> A
return 1;
if (mychar==5) // $ -> $
return 5;
if (!asnum)
int seqlen = strlen (inseq);
int i, j;
if (len != 0)
seqlen = len;
for (i = 0, j = 0; i < seqlen; i++)
{
if (mychar=='A') // A -> T
return 'T';
if (mychar=='C') // C -> G
return 'G';
if (mychar=='G') // G -> T
return 'C';
if (mychar=='T') // T -> A
return 'A';
if (mychar=='$') // $ -> $
return '$';
switch (inseq[i])
{
case 'a':
case '\0':
case 'A':
outseq[j++] = 'A'; break;
case 'c':
case '\1':
case 'C':
outseq[j++] = 'C'; break;
case 'g':
case '\2':
case 'G':
outseq[j++] = 'G'; break;
case 't':
case '\3':
case 'T':
outseq[j++] = 'T'; break;
}
}
else
{
if (mychar=='A') // A -> T
return 4;
if (mychar=='C') // C -> G
return 3;
if (mychar=='G') // G -> T
return 2;
if (mychar=='T') // T -> A
return 1;
if (mychar=='$') // $ -> $
return 5;
}
return '*';
outseq[j] = '\0';
}
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: static bool CNNParams::isMismatch(char,char)
//
// PURPOSE: Return true if char1 and char2 are not watson-crick pair
//
// PARAMETERS:
// char1 - first character
// char2 - second character
//
// RETURN VALUE:
// bool - true if char1,char2 are Watson-Crick, false otw
//
// REVISION HISTORY
// $ 00sep28 : created LK
// #$
int nparam_isMismatch(char c1,char c2)
//Calculate TM for given sequence against its complement
float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
{
if (c1+c2==5)
return 0;
else
return 1;
float thedH = 0;
//float thedS = nparam_GetInitialEntropy(nparm);
float thedS = -5.9f+nparm->rlogc;
float mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq[50];
char *useq = seq;
nparam_CleanSeq (seq, nseq, len);
useq = nseq;
int slen = strlen(useq);
//fprintf (stderr,"Primer : %s\n",useq);
for ( i=1;i<slen;i++)
{
c1 = GETREVCODE(useq[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq[i-1]);
c4 = GETNUMCODE(useq[i]);
//fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
//fprintf(stderr,"------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//if (mtemp == 0)
//{
// fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
//exit (0);
//}
return mtemp;
}
char nparam_convertNum(char c)
{
if (c=='A')
return 1;
if (c=='C')
return 2;
if (c=='G')
return 3;
if (c=='T')
return 4;
if (c=='$')
return 5;
return 0;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::GetEntropy(char,char,char,char)
//
// PURPOSE: Retrieve Entropy for given NN-Pair from parameter table
//
// PARAMETERS: x0,x1,y0,y1: Pairs to look up in form
// 5'-x0-x1-3' / 3'-y0-y1-5'
//
// RETURN VALUE:
// float: Entropy dS for given NN pair
//
// REVISION HISTORY
// $ 00sep06 : created LK
// 00dec29 : included dangling end parameters
// 01feb07 : rewritten. looks ugly now, but is FAST! inline.
// #$
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
char nx0=nparam_convertNum(x0);
char nx1=nparam_convertNum(x1);
char ny0=nparam_convertNum(y0);
char ny1=nparam_convertNum(y1);
float answer = ndS(nx0,nx1,ny0,ny1);
//Salt correction Santalucia
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
if(nx0!=5 && 1<= nx1 && nx1<=4) {
answer += 0.5*nparm->kfac;
}
if(ny1!=5 && 1<= ny0 && ny0<=4) {
answer += 0.5*nparm->kfac;
}
}
//Salt correction Owczarzy
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
float logk = log(nparm->kplus);
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
}
return answer;
}
float calculateMeltingTemperatureBasic (char * seq) {
int gccount;
float temp;
int seqlen;
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::GetEnthalpy(char,char,char,char)
//
// PURPOSE: Retrieve Enthalpy for given NN-Pair from parameter table
//
// PARAMETERS: x0,x1,y0,y1: Pairs to look up in form
// 5'-x0-x1-3' / 3'-y0-y1-5'
//
// RETURN VALUE:
// float: Enthalpy dH for given NN pair
//
// REVISION HISTORY
// $ 00sep06 : created LK
// $ 00dec29 : included dangling end parameters
// $ 01feb07 : rewritten. looks ugly now, but is FAST! inline.
// #$
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
char nx0=nparam_convertNum(x0);
char nx1=nparam_convertNum(x1);
char ny0=nparam_convertNum(y0);
char ny1=nparam_convertNum(y1);
return ndH(nx0,nx1,ny0,ny1);
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::CalcTM(float entropy,float enthalpy)
//
// PURPOSE: Return melting temperature TM for given entropy and enthalpy
// Assuming a one-state transition and using the formula
// TM = dH / (dS + R ln(Ct/4))
// entropy = dS + R ln Ct/4 (must already be included!)
// enthaklpy = dH
// where
// dH = enthalpy
// dS = entropy
// R = Boltzmann factor
// Ct = Strand Concentration
//
// PARAMETERS:
//
// RETURN VALUE:
//
// REVISION HISTORY
// $ 00sep06 : created LK
// $ 01jan07 : modified and corrected
// $ 01feb07 : optimized!!! inline
// $ 01feb09 : changed to include r ln ct in entropy!!!
// #$
float nparam_CalcTM(float entropy,float enthalpy)
{
float tm = 0; // absolute zero - return if model fails!
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
return 0;
if (entropy<0) // avoid division by zero and model errors!
{
tm = enthalpy/entropy;// - kfac; //LKFEB
if (tm<0)
return 0;
}
return tm;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: void CNNParams::AlterTM(float)
//
// PURPOSE: TM can be altered by a new assignment
//
// PARAMETERS: tm_new: new value for TM
//
// RETURN VALUE:
//
// REVISION HISTORY
//
// #$
void nparam_AlterTM(PNNParams nparm, float tm_new)
{
nparm->new_TM = tm_new;
return;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::CalcG(float entropy, float enthalpy)
//
// PURPOSE: return free energy G for given entropy and enthalpy
// Assuming a one-state transition and using the formula
// G = dH - new_TM * dS
// dH = enthalpy
// dS = entropy
// new_TM = value for the optimal melting temperature of
// the last iteration
//
// PARAMETERS: entropy and enthalpy
//
// RETURN VALUE: free Energy value (dG) for the
//
// REVISION HISTORY
//
// #$
float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy)
{
float freeEnergy = -999999999;
if (enthalpy>=forbidden_enthalpy)
return -999999999;
if (entropy<0) // avoid division by zero and model errors!
{
entropy = entropy * -1;
enthalpy = enthalpy * -1;
freeEnergy = enthalpy - nparm->new_TM * entropy;
}
return freeEnergy;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::CalcSelfTM(char*)
//
// PURPOSE: Calculate TM for given sequence against its complement
//
// PARAMETERS:
// char* - Sequence to consider
//
// RETURN VALUE:
// float - Melting temperature in degrees Kelvin
//
// REVISION HISTORY
// $ 01jan09 LK : created
// $ 01feb07 LK : inline.
// #$
float nparam_CalcSelfTM(PNNParams nparm, char* seq)
{
float thedH = 0;
float thedS = nparam_GetInitialEntropy(nparm);
char c1;
char c2;
char c3;
char c4;
unsigned int i;
for ( i=1;i<strlen(seq);i++)
{
c1 = nparam_getComplement(seq[i-1],1);
c2 = nparam_getComplement(seq[i],1);
c3 = seq[i-1];
c4 = seq[i];
if (c3>5)
{
if (c3=='A')
c3=1;
if (c3=='C')
c3=2;
if (c3=='G')
c3=3;
if (c3=='T')
c3=4;
if (c3=='$')
c3=5;
}
if (c4>5)
{
if (c4=='A')
c4=1;
if (c4=='C')
c4=2;
if (c4=='G')
c4=3;
if (c4=='T')
c4=4;
if (c4=='$')
c4=5;
}
thedH += nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
return nparam_CalcTM(thedS,thedH);
}
float nparam_GetInitialEntropy(PNNParams nparm)
{
return -5.9f+nparm->rlogc;
}
seqlen = strlen (seq);
gccount = nparam_CountGCContent (seq);
temp = 64.9 + 41*(gccount - 16.4)/seqlen;
return temp;
}

View File

@ -2,72 +2,49 @@
* nnparams.h
* PHunterLib
*
* Created by Tiayyba Riaz on 7/2/09.
* Copyright 2009 __MyCompanyName__. All rights reserved.
* Nearest Neighbor Model Parameters
*
* Created by Tiayyba Riaz on 02/07/09.
*
*/
//=============================================================================
// Module: nnparams.h
// Project: Diploma Thesis - Probe Selection for DNA Microarrays
// Type: header file - Nearest Neighbor Parameters / Model.
// Language: c++
// Compiler: microsoft visual c++ 6.0, unix/linux gcc
// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested)
// Database: none
// Description: class CNNParams - Nearest Neighbor Model Parameters
// Author: kaderali
// Date: 9-12/2000
// Copyright: (c) L. Kaderali, 9/2000 - 12/2000
//
// Revision History
// $ 00sep07 LK : created
// 00dec29 LK : changed to include dangling end data
// 01jan09 LK : included CalcSelfTM function
// 01feb07 LK : optimized
// #$
//=============================================================================
#if !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_)
#define AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#ifndef NNPARAMS_H_
#define NNPARAMS_H_
#include <math.h>
#include <string.h>
#ifdef _pack
#pragma pack(1)
#endif
//#include "../libecoprimer/ecoprimer.h"
// following defines to simplify coding...
#define ndH(a,b,c,d) nparm->dH[a][b][c][d]
#define ndS(a,b,c,d) nparm->dS[a][b][c][d]
#define forbidden_enthalpy 1000000000000000000.0f
//#define forbidden_enthalpy_div1000 1000000000000000.0f
// forbidden entropy=-rlogc
// #define forbidden_entropy 30.205986374220235304486574573422f
// Boltzmann factor (cal/degrees C*mol)
#define R 1.987f
#define SALT_METHOD_SANTALUCIA 1
#define SALT_METHOD_OWCZARZY 2
// Strand concentration (assumption!) (M)
// #define Ct 0.000001f
// r*ln(ct/4) as required by many formulas
//#define rlogc -30.205986374220235304486574573422f
#define DEF_CONC_PRIMERS 0.0000008
#define DEF_CONC_SEQUENCES 0
#define DEF_SALT 0.05
#define GETNUMCODE(a) bpencoder[a - 'A']
#define GETREVCODE(a) 5-bpencoder[a - 'A']
extern float forbidden_entropy;
//-----------------------------------------------------------------------------
// class CNNParams
//typedef class CNNParams* PNNParams;
static char bpencoder[] = { 1, // A
0, // b
2, // C
0,0,0, // d, e, f
3, // G
0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s
4,0, // T,U
0,0,0,0,0}; // v,w,x,y,z
//#pragma GCC visibility push(hidden)
typedef struct CNNParams_st
{
//public:
float Ct1;
float Ct2;
float rlogc;
@ -75,30 +52,19 @@ typedef struct CNNParams_st
float kfac;
int saltMethod;
float gcContent;
float new_TM; // ge‰ndert von ML!!!
//CNNParams();
//virtual ~CNNParams();
//private:
float new_TM;
float dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
float dS[6][6][6][6];
}CNNParams, * PNNParams;
//void nparam_InitParams(PNNParams nparm, float c1=0.000001f, float c2=0.000001f, float kp=1, int sm = SALT_METHOD_SANTALUCIA );
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm);
void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2);
float nparam_CountGCContent(char * seq );
char nparam_convertNum(char c);
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_CalcTM(float entropy,float enthalpy);
void nparam_AlterTM(PNNParams nparm, float tm_new);
float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy);
float nparam_CalcSelfTM(PNNParams nparm, char* seq);
float nparam_GetInitialEntropy(PNNParams nparm) ;
char nparam_getComplement(char mychar, int asnum);
int nparam_isMismatch(char,char);
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm);
int nparam_CountGCContent(char * seq );
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_CalcTM(float entropy,float enthalpy);
float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
float nparam_GetInitialEntropy(PNNParams nparm) ;
float calculateMeltingTemperatureBasic (char * seq);
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
//#pragma GCC visibility pop
#endif // !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_)
#endif