commit -m "Cleaned code for thermodynamics properties and added Melting Temperature for approximate version of strict repeats. Also cleaned printing code.

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@223 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2009-07-13 09:26:19 +00:00
parent 91753ace82
commit e79738e170
8 changed files with 305 additions and 564 deletions

View File

@ -6,7 +6,6 @@
*/ */
#include "libecoprimer/ecoprimer.h" #include "libecoprimer/ecoprimer.h"
#include "libthermo/libthermo.h"
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
@ -15,6 +14,9 @@
#include <time.h> #include <time.h>
#include <sys/time.h> #include <sys/time.h>
#include <dlfcn.h> #include <dlfcn.h>
#include"libthermo/nnparams.h"
#include"libthermo/thermostats.h"
#define VERSION "0.3" #define VERSION "0.3"
/* TR: by default, statistics are made on species level*/ /* TR: by default, statistics are made on species level*/
@ -153,37 +155,10 @@ void initoptions(poptions_t options)
options->r=0; options->r=0;
options->g=0; options->g=0;
options->no_multi_match=FALSE; options->no_multi_match=FALSE;
options->pnparm = NULL;
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/ strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
} }
void printcurrenttime ()
{
time_t now;
struct tm *ts;
char buf[80];
/* Get the current time */
now = time(NULL);
/* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
ts = localtime(&now);
strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
fprintf(stderr,"#%d#, %s\n",(int)now, buf);
}
void printcurrenttimeinmilli()
{
struct timeval tv;
struct timezone tz;
struct tm *tm;
gettimeofday(&tv, &tz);
tm=localtime(&tv.tv_sec);
fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min,
tm->tm_sec, tv.tv_usec, tv.tv_usec/1000);
}
/*TR: Added*/
void printapair(int32_t index,ppair_t pair, poptions_t options) void printapair(int32_t index,ppair_t pair, poptions_t options)
{ {
bool_t asdirect1=pair->asdirect1; bool_t asdirect1=pair->asdirect1;
@ -198,6 +173,9 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
bool_t strand; bool_t strand;
uint32_t i; uint32_t i;
float temp; float temp;
CNNParams nnparams;
//nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
char *c; char *c;
char p1[32]; char p1[32];
@ -225,6 +203,7 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
good2=goodtmp; good2=goodtmp;
} }
//print serial number
printf("%6d\t",index); printf("%6d\t",index);
c = ecoUnhashWord(w1,options->primer_length); c = ecoUnhashWord(w1,options->primer_length);
@ -232,57 +211,67 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
c = ecoUnhashWord(w2,options->primer_length); c = ecoUnhashWord(w2,options->primer_length);
strcpy (p2, c); strcpy (p2, c);
//print primer1
printf("%s\t", p1); printf("%s\t", p1);
//print primer2
printf("%s", p2); printf("%s", p2);
/*For thermo: first word should be 5'-3'*/ //print primer1 melting temperature
/*if (!asdirect1) printf ("\t%4.3f", pair->p1temp);
{
wtmp = ecoComplementWord(w1,options->primer_length);
c = ecoUnhashWord(wtmp,options->primer_length);
strcpy (p1, c);
}*/
/*For thermo: first word should be 5'-3'*/
/*if (asdirect2)
{
wtmp = ecoComplementWord(w2,options->primer_length);
c = ecoUnhashWord(wtmp,options->primer_length);
strcpy (p2, c);
}*/
/*temp = calculateMeltingTemperature (p1, p2);*/ //print minimum melting temperature of approximate versions of primer1
//float temp = calculateMeltingTemperature ("CTGTTTACCAAAAACATC", "GGTCTGAACTCAGATCAC"); printf ("\t%4.3f", pair->p1mintemp);
temp = calculateMeltingTemperatureBasic(p1); //print primer2 melting temperature
printf ("\t%4.3f", temp); printf ("\t%4.3f", pair->p2temp);
temp = calculateMeltingTemperatureBasic(p2); //print minimum melting temperature of approximate versions of primer2
printf ("\t%4.3f", temp); printf ("\t%4.3f", pair->p2mintemp);
printf ("\t%d",countGCContent(p1)); //print gc contents of primer1
printf ("\t%d",nparam_CountGCContent(p1));
printf ("\t%d",countGCContent(p2)); //print gc contents of primer2
printf ("\t%d",nparam_CountGCContent(p2));
//print good/bad pair indicator
printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]); printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]);
//print inexample count
printf("\t%d", pair->inexample); printf("\t%d", pair->inexample);
//print out example count
printf("\t%d", pair->outexample); printf("\t%d", pair->outexample);
//print yule
printf("\t%4.3f", pair->yule); printf("\t%4.3f", pair->yule);
//print in taxa count
printf("\t%d", pair->intaxa); printf("\t%d", pair->intaxa);
//print out taxa count
printf("\t%d", pair->outtaxa); printf("\t%d", pair->outtaxa);
//print coverage
printf("\t%4.3f", (float)pair->bc); printf("\t%4.3f", (float)pair->bc);
//print well identified taxa count
printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa); printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa);
//print specificity
printf("\t%4.3f", pair->bs); printf("\t%4.3f", pair->bs);
//print min amplifia lenght
printf("\t%d", pair->mind); printf("\t%d", pair->mind);
//print max amplifia lenght
printf("\t%d", pair->maxd); printf("\t%d", pair->maxd);
//print average amplifia lenght
printf("\t%3.2f", (float)pair->sumd/pair->inexample); printf("\t%3.2f", (float)pair->sumd/pair->inexample);
//print amplifia information about reference sequence if specified
if (options->refseq && pair->refsequence >=0) if (options->refseq && pair->refsequence >=0)
{ {
printf("\t%s:",options->reference); printf("\t%s:",options->reference);
@ -351,31 +340,21 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
qfp = (float)sortedpairs[i]->outexample/options->outsamples; qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0; else qfp=0.0;
sortedpairs[i]->quorumin = q; sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp; sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q - qfp; sortedpairs[i]->yule = q - qfp;
sortedpairs[j]=sortedpairs[i]; sortedpairs[j]=sortedpairs[i];
if (q > options->sensitivity_quorum && if (q > options->sensitivity_quorum &&
qfp < options->false_positive_quorum) qfp < options->false_positive_quorum)
{ {
(void)taxonomycoverage(sortedpairs[j],options); (void)taxonomycoverage(sortedpairs[j],options);
taxonomyspecificity(sortedpairs[j]); taxonomyspecificity(sortedpairs[j]);
// fprintf(stderr,"%4.3f %4.3f %4.3f\n",sortedpairs[j]->yule , sortedpairs[j]->bs,sortedpairs[j]->bc);
j++; j++;
} }
} }
qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs); qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs);
return j; return j;
} }
@ -410,6 +389,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
sortedpairs[j]=pl->pairs+i; sortedpairs[j]=pl->pairs+i;
count=filterandsortpairs(sortedpairs,pairs->count,options); count=filterandsortpairs(sortedpairs,pairs->count,options);
getThermoProperties(sortedpairs, count, options);
fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count); fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count);
@ -477,7 +457,6 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
} }
/*updateseqparams: This function counts the insample and outsample sequences /*updateseqparams: This function counts the insample and outsample sequences
* and with each sequences adds a tag of the taxon to which the sequence beongs*/ * and with each sequences adds a tag of the taxon to which the sequence beongs*/
@ -556,6 +535,12 @@ int main(int argc, char **argv)
//float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC"); //float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC");
//printf ("temp = %f\n", temp); //printf ("temp = %f\n", temp);
//return 0; //return 0;
//char *t = "ACGT";
//printf ("\nGETNUMCODE: A=%d, C=%d, G=%d, T=%d\n", GETNUMCODE(t[0]), GETNUMCODE('C'), GETNUMCODE('G'), GETNUMCODE('T'));
//printf ("\nGETREVCODE: A=%d, C=%d, G=%d, T=%d\n", GETREVCODE(t[0]), GETREVCODE('C'), GETREVCODE('G'), GETREVCODE('T'));
//return 0;
CNNParams nnparams;
nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
initoptions(&options); initoptions(&options);
@ -701,7 +686,7 @@ int main(int argc, char **argv)
} }
} }
options.pnparm = &nnparams;
fprintf(stderr,"Reading taxonomy database ..."); fprintf(stderr,"Reading taxonomy database ...");
taxonomy = read_taxonomy(options.prefix,0); taxonomy = read_taxonomy(options.prefix,0);
fprintf(stderr,"Ok\n"); fprintf(stderr,"Ok\n");
@ -789,17 +774,10 @@ int main(int argc, char **argv)
fprintf(stderr,"\n"); fprintf(stderr,"\n");
/*TR: Added*/
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options); pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
//fprintf(stderr,"buildPrimerPairs finished\n");
// setoktaxforspecificity (&pairs);
printpairs (pairs, &options,taxonomy); printpairs (pairs, &options,taxonomy);
// closlibman (); // closlibman ();
//ECOFREE(pairs.pairs,"Free pairs table");
return 0; return 0;
} }

View File

@ -13,6 +13,7 @@
#include <stdio.h> #include <stdio.h>
#include "ecotype.h" #include "ecotype.h"
#include "../libecoPCR/ecoPCR.h" #include "../libecoPCR/ecoPCR.h"
#include "../libthermo/nnparams.h"
#include "apat.h" #include "apat.h"
#define DEBUG #define DEBUG
@ -190,7 +191,10 @@ typedef struct {
// //
// uint32_t oktaxoncount; // uint32_t oktaxoncount;
uint32_t curseqid; uint32_t curseqid;
float p1temp; //strict primer1 melting temperature
float p1mintemp; //approx primer1 minimum melting temperature
float p2temp; //strict primer2 melting temperature
float p2mintemp; //approx primer2 minimum melting temperature
} pair_t, *ppair_t; } pair_t, *ppair_t;
/*TR: Added*/ /*TR: Added*/
@ -274,6 +278,7 @@ typedef struct {
int32_t outsamples; int32_t outsamples;
int32_t intaxa; int32_t intaxa;
int32_t outtaxa; int32_t outtaxa;
PNNParams pnparm;
} options_t, *poptions_t; } options_t, *poptions_t;
typedef ecoseq_t **pecodnadb_t; typedef ecoseq_t **pecodnadb_t;
@ -338,5 +343,6 @@ void taxonomyspecificity (ppair_t pair);
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize, int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum); uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum);
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize);
#endif /* EPSORT_H_ */ #endif /* EPSORT_H_ */

View File

@ -163,9 +163,7 @@ ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t
{ {
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options); buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
} }
return primerpairs; return primerpairs;
} }
#define DMAX (2000000000) #define DMAX (2000000000)
@ -187,6 +185,10 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
bool_t bswp; bool_t bswp;
size_t distance; size_t distance;
bool_t strand; bool_t strand;
//char prmr[50];
//float mtemp;
//prmr[options->primer_length] = '\0';
for (i=0;i < primers->size; i++) for (i=0;i < primers->size; i++)
{ {
@ -272,10 +274,12 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
current.outexample=0; current.outexample=0;
current.curseqid = 0; current.curseqid = 0;
current.refsequence=-1; current.refsequence=-1;
//current.p1temp = 100;
//current.p1mintemp = 100;
//current.p2temp = 100;
//current.p2mintemp = 100;
// Standardize the pair // Standardize the pair
strand = current.p2->word > current.p1->word; strand = current.p2->word > current.p1->word;
if (!strand) if (!strand)
{ {
@ -309,6 +313,10 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
//else //else
// pcurrent->outexample++; // pcurrent->outexample++;
//for each pair we save current sequence id in the pair
//when we see this pair for the first time in currnet sequence
//because we want to increment inexample & outexample count
//only once for one sequence
if (pcurrent->curseqid != (seqid+1)) if (pcurrent->curseqid != (seqid+1))
{ {
if (seqdb[seqid]->isexample) if (seqdb[seqid]->isexample)
@ -359,6 +367,23 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
else else
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ; pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
/*strncpy (prmr, seqdb[seqid]->SQ + matches[i].position, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pcurrent->p1mintemp)
pcurrent->p1mintemp = mtemp;
//fprintf (stderr, "prmr1: %s\n", seqdb[seqid]->SQ);
strncpy (prmr, seqdb[seqid]->SQ + matches[j].position, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pcurrent->p2mintemp)
pcurrent->p2mintemp = mtemp;
//fprintf (stderr, "prmr2: %s\n", prmr);
if (pcurrent->p1temp == 100)
pcurrent->p1temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p1->word, options->primer_length), 0) - 273.0;
if (pcurrent->p2temp == 100)
pcurrent->p2temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p2->word, options->primer_length), 0) - 273.0;
*/
pcurrent->pcr.ampcount++; pcurrent->pcr.ampcount++;
// fprintf(stderr,"%c%c W1 : %s direct : %c", // fprintf(stderr,"%c%c W1 : %s direct : %c",
// "bG"[(int)pcurrent->p1->good], // "bG"[(int)pcurrent->p1->good],

View File

@ -33,3 +33,18 @@ pecodnadb_t readdnadb(const char *name, uint32_t *size)
return db; return db;
} }
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize)
{
uint32_t i;
char ch[11];
ch [10] = '\0';
for (i=0; i < seqdbsize; i++)
{
strncpy (ch, seqdb[i]->SQ, 10);
fprintf (stderr, "seq %d = %s\n", i, ch);
}
exit (0);
}

View File

@ -1,9 +1,6 @@
SOURCES = dinkelbach.c \ SOURCES = nnparams.c \
galign.c \ thermostats.c
libthermo.c \
nnparams.c \
thermalign.c
SRCS=$(SOURCES) SRCS=$(SOURCES)

Binary file not shown.

View File

@ -2,68 +2,101 @@
* nnparams.cpp * nnparams.cpp
* PHunterLib * PHunterLib
* *
* Nearest Neighbor Model / Parameters
*
* Created by Tiayyba Riaz on 7/2/09. * Created by Tiayyba Riaz on 7/2/09.
* Copyright 2009 __MyCompanyName__. All rights reserved.
* *
*/ */
//=============================================================================
// Module: nnparams.cpp
// Project: Diploma Thesis - Probe Selection for DNA Microarrays
// Type: implementation - Nearest Neighbor Model / Parameters
// Language: c++
// Compiler: microsoft visual c++ 6.0, unix/linux gcc
// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested)
// Database: none
// Description: class CNNParams - Nearest Neighbor Model / Parameters
// Author: kaderali
// Date: 12/2000
// Copyright: (c) L. Kaderali, 9/2000 - 12/2000
//
// Revision History
// $ 00sep07 LK : created
// 00dec17 LK : modified to use new nn parameters
// 00dec29 LK : modified to include dangling end parameters
// 01jan09 LK : included CalcSelfTM
// 01feb07 LK : optimized
// #$
//=============================================================================
#include <memory.h> #include <memory.h>
#include <math.h> #include <math.h>
#include <stdio.h>
#include <string.h> #include <string.h>
#include"nnparams.h" #include"nnparams.h"
#ifdef _pack
#pragma pack(1)
#endif
float forbidden_entropy; float forbidden_entropy;
///////////////////////////////////////////////////////////////////////////////
// Construction/Destruction float nparam_GetInitialEntropy(PNNParams nparm)
/////////////////////////////////////////////////////////////////////////////// {
return -5.9f+nparm->rlogc;
}
/////////////////////////////////////////////////////////////////////////////// //Retrieve Enthalpy for given NN-Pair from parameter table
// FUNCTION: void CNNParams::InitParams() float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
// {
// PURPOSE: Initialize nearest neighbor parameters. For now, simply set return ndH(x0,x1,y0,y1); //xx, yx are already numbers
// params as requiered. Extend to read from file sometime. }
//
// PARAMETERS:
// none //Retrieve Entropy for given NN-Pair from parameter table
// float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
// RETURN VALUE: {
// void //xx and yx are already numbers
// char nx0=x0;//nparam_convertNum(x0);
// REVISION HISTORY char nx1=x1;//nparam_convertNum(x1);
// $ 00sep06 : created LK char ny0=y0;//nparam_convertNum(y0);
// 00dec17 : modified to use new parameters LK char ny1=y1;//nparam_convertNum(y1);
// 00dec29 : included dangling end data LK float answer = ndS(nx0,nx1,ny0,ny1);
// #$ /*Salt correction Santalucia*/
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
if(nx0!=5 && 1<= nx1 && nx1<=4) {
answer += 0.5*nparm->kfac;
}
if(ny1!=5 && 1<= ny0 && ny0<=4) {
answer += 0.5*nparm->kfac;
}
}
/*Salt correction Owczarzy*/
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
float logk = log(nparm->kplus);
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
}
return answer;
}
/* PURPOSE: Return melting temperature TM for given entropy and enthalpy
* Assuming a one-state transition and using the formula
* TM = dH / (dS + R ln(Ct/4))
* entropy = dS + R ln Ct/4 (must already be included!)
* enthaklpy = dH
* where
* dH = enthalpy
* dS = entropy
* R = Boltzmann factor
* Ct = Strand Concentration
*
* PARAMETERS:
* entrypy and enthalpy
*
* RETURN VALUE:
* temperature
*/
float nparam_CalcTM(float entropy,float enthalpy)
{
float tm = 0; // absolute zero - return if model fails!
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
return 0;
if (entropy<0) // avoid division by zero and model errors!
{
tm = enthalpy/entropy;// - kfac; //LKFEB
if (tm<0)
return 0;
}
return tm;
}
/* PURPOSE: Initialize nearest neighbor parameters.
*
* PARAMETERS:
* none
*
* RETURN VALUE:
* void
*/
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
{ {
@ -433,23 +466,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
return; return;
} }
void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2) int nparam_CountGCContent(char * seq ) {
{
float l1 = strlen(s1);
float l2 = strlen(s2);
if(l1<l2) {
nparm->gcContent = nparam_CountGCContent(s1);
nparm->gcContent /= (l1-2);
} else if (l1>l2) {
nparm->gcContent = nparam_CountGCContent(s2);
nparm->gcContent /= (l2-2);
} else {
nparm->gcContent = nparam_CountGCContent(s1)+nparam_CountGCContent(s2);
nparm->gcContent /= (l1+l2-4);
}
}
float nparam_CountGCContent(char * seq ) {
int lseq = strlen(seq); int lseq = strlen(seq);
int k; int k;
float count = 0; float count = 0;
@ -461,352 +478,89 @@ float nparam_CountGCContent(char * seq ) {
return count; return count;
} }
/////////////////////////////////////////////////////////////////////////////// void nparam_CleanSeq (char* inseq, char* outseq, int len)
// FUNCTION: static char CNNParams::getComplement(char mychar)
//
// PURPOSE: return Watson-Crick complement to mychar
//
// PARAMETERS:
// mychar - Character to complement
//
// RETURN VALUE:
// char - complement to mychar
//
// REVISION HISTORY
// $ 00sep28 : created LK
// #$
char nparam_getComplement(char mychar, int asnum)
{ {
if (mychar==1) // A -> T int seqlen = strlen (inseq);
return 4; int i, j;
if (mychar==2) // C -> G
return 3; if (len != 0)
if (mychar==3) // G -> T seqlen = len;
return 2;
if (mychar==4) // T -> A for (i = 0, j = 0; i < seqlen; i++)
return 1;
if (mychar==5) // $ -> $
return 5;
if (!asnum)
{ {
if (mychar=='A') // A -> T switch (inseq[i])
return 'T'; {
if (mychar=='C') // C -> G case 'a':
return 'G'; case '\0':
if (mychar=='G') // G -> T case 'A':
return 'C'; outseq[j++] = 'A'; break;
if (mychar=='T') // T -> A case 'c':
return 'A'; case '\1':
if (mychar=='$') // $ -> $ case 'C':
return '$'; outseq[j++] = 'C'; break;
case 'g':
case '\2':
case 'G':
outseq[j++] = 'G'; break;
case 't':
case '\3':
case 'T':
outseq[j++] = 'T'; break;
}
} }
else outseq[j] = '\0';
{
if (mychar=='A') // A -> T
return 4;
if (mychar=='C') // C -> G
return 3;
if (mychar=='G') // G -> T
return 2;
if (mychar=='T') // T -> A
return 1;
if (mychar=='$') // $ -> $
return 5;
}
return '*';
} }
//Calculate TM for given sequence against its complement
float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: static bool CNNParams::isMismatch(char,char)
//
// PURPOSE: Return true if char1 and char2 are not watson-crick pair
//
// PARAMETERS:
// char1 - first character
// char2 - second character
//
// RETURN VALUE:
// bool - true if char1,char2 are Watson-Crick, false otw
//
// REVISION HISTORY
// $ 00sep28 : created LK
// #$
int nparam_isMismatch(char c1,char c2)
{ {
if (c1+c2==5) float thedH = 0;
return 0; //float thedS = nparam_GetInitialEntropy(nparm);
else float thedS = -5.9f+nparm->rlogc;
return 1; float mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq[50];
char *useq = seq;
nparam_CleanSeq (seq, nseq, len);
useq = nseq;
int slen = strlen(useq);
//fprintf (stderr,"Primer : %s\n",useq);
for ( i=1;i<slen;i++)
{
c1 = GETREVCODE(useq[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq[i-1]);
c4 = GETNUMCODE(useq[i]);
//fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
//fprintf(stderr,"------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//if (mtemp == 0)
//{
// fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
//exit (0);
//}
return mtemp;
} }
float calculateMeltingTemperatureBasic (char * seq) {
int gccount;
float temp;
int seqlen;
char nparam_convertNum(char c) seqlen = strlen (seq);
{ gccount = nparam_CountGCContent (seq);
if (c=='A') temp = 64.9 + 41*(gccount - 16.4)/seqlen;
return 1; return temp;
if (c=='C') }
return 2;
if (c=='G')
return 3;
if (c=='T')
return 4;
if (c=='$')
return 5;
return 0;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::GetEntropy(char,char,char,char)
//
// PURPOSE: Retrieve Entropy for given NN-Pair from parameter table
//
// PARAMETERS: x0,x1,y0,y1: Pairs to look up in form
// 5'-x0-x1-3' / 3'-y0-y1-5'
//
// RETURN VALUE:
// float: Entropy dS for given NN pair
//
// REVISION HISTORY
// $ 00sep06 : created LK
// 00dec29 : included dangling end parameters
// 01feb07 : rewritten. looks ugly now, but is FAST! inline.
// #$
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
char nx0=nparam_convertNum(x0);
char nx1=nparam_convertNum(x1);
char ny0=nparam_convertNum(y0);
char ny1=nparam_convertNum(y1);
float answer = ndS(nx0,nx1,ny0,ny1);
//Salt correction Santalucia
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
if(nx0!=5 && 1<= nx1 && nx1<=4) {
answer += 0.5*nparm->kfac;
}
if(ny1!=5 && 1<= ny0 && ny0<=4) {
answer += 0.5*nparm->kfac;
}
}
//Salt correction Owczarzy
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
float logk = log(nparm->kplus);
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
}
return answer;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::GetEnthalpy(char,char,char,char)
//
// PURPOSE: Retrieve Enthalpy for given NN-Pair from parameter table
//
// PARAMETERS: x0,x1,y0,y1: Pairs to look up in form
// 5'-x0-x1-3' / 3'-y0-y1-5'
//
// RETURN VALUE:
// float: Enthalpy dH for given NN pair
//
// REVISION HISTORY
// $ 00sep06 : created LK
// $ 00dec29 : included dangling end parameters
// $ 01feb07 : rewritten. looks ugly now, but is FAST! inline.
// #$
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
char nx0=nparam_convertNum(x0);
char nx1=nparam_convertNum(x1);
char ny0=nparam_convertNum(y0);
char ny1=nparam_convertNum(y1);
return ndH(nx0,nx1,ny0,ny1);
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::CalcTM(float entropy,float enthalpy)
//
// PURPOSE: Return melting temperature TM for given entropy and enthalpy
// Assuming a one-state transition and using the formula
// TM = dH / (dS + R ln(Ct/4))
// entropy = dS + R ln Ct/4 (must already be included!)
// enthaklpy = dH
// where
// dH = enthalpy
// dS = entropy
// R = Boltzmann factor
// Ct = Strand Concentration
//
// PARAMETERS:
//
// RETURN VALUE:
//
// REVISION HISTORY
// $ 00sep06 : created LK
// $ 01jan07 : modified and corrected
// $ 01feb07 : optimized!!! inline
// $ 01feb09 : changed to include r ln ct in entropy!!!
// #$
float nparam_CalcTM(float entropy,float enthalpy)
{
float tm = 0; // absolute zero - return if model fails!
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
return 0;
if (entropy<0) // avoid division by zero and model errors!
{
tm = enthalpy/entropy;// - kfac; //LKFEB
if (tm<0)
return 0;
}
return tm;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: void CNNParams::AlterTM(float)
//
// PURPOSE: TM can be altered by a new assignment
//
// PARAMETERS: tm_new: new value for TM
//
// RETURN VALUE:
//
// REVISION HISTORY
//
// #$
void nparam_AlterTM(PNNParams nparm, float tm_new)
{
nparm->new_TM = tm_new;
return;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::CalcG(float entropy, float enthalpy)
//
// PURPOSE: return free energy G for given entropy and enthalpy
// Assuming a one-state transition and using the formula
// G = dH - new_TM * dS
// dH = enthalpy
// dS = entropy
// new_TM = value for the optimal melting temperature of
// the last iteration
//
// PARAMETERS: entropy and enthalpy
//
// RETURN VALUE: free Energy value (dG) for the
//
// REVISION HISTORY
//
// #$
float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy)
{
float freeEnergy = -999999999;
if (enthalpy>=forbidden_enthalpy)
return -999999999;
if (entropy<0) // avoid division by zero and model errors!
{
entropy = entropy * -1;
enthalpy = enthalpy * -1;
freeEnergy = enthalpy - nparm->new_TM * entropy;
}
return freeEnergy;
}
///////////////////////////////////////////////////////////////////////////////
// inline function
///////////////////////////////////////////////////////////////////////////////
// FUNCTION: float CNNParams::CalcSelfTM(char*)
//
// PURPOSE: Calculate TM for given sequence against its complement
//
// PARAMETERS:
// char* - Sequence to consider
//
// RETURN VALUE:
// float - Melting temperature in degrees Kelvin
//
// REVISION HISTORY
// $ 01jan09 LK : created
// $ 01feb07 LK : inline.
// #$
float nparam_CalcSelfTM(PNNParams nparm, char* seq)
{
float thedH = 0;
float thedS = nparam_GetInitialEntropy(nparm);
char c1;
char c2;
char c3;
char c4;
unsigned int i;
for ( i=1;i<strlen(seq);i++)
{
c1 = nparam_getComplement(seq[i-1],1);
c2 = nparam_getComplement(seq[i],1);
c3 = seq[i-1];
c4 = seq[i];
if (c3>5)
{
if (c3=='A')
c3=1;
if (c3=='C')
c3=2;
if (c3=='G')
c3=3;
if (c3=='T')
c3=4;
if (c3=='$')
c3=5;
}
if (c4>5)
{
if (c4=='A')
c4=1;
if (c4=='C')
c4=2;
if (c4=='G')
c4=3;
if (c4=='T')
c4=4;
if (c4=='$')
c4=5;
}
thedH += nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
return nparam_CalcTM(thedS,thedH);
}
float nparam_GetInitialEntropy(PNNParams nparm)
{
return -5.9f+nparm->rlogc;
}

View File

@ -2,72 +2,49 @@
* nnparams.h * nnparams.h
* PHunterLib * PHunterLib
* *
* Created by Tiayyba Riaz on 7/2/09. * Nearest Neighbor Model Parameters
* Copyright 2009 __MyCompanyName__. All rights reserved. *
* Created by Tiayyba Riaz on 02/07/09.
* *
*/ */
//============================================================================= #ifndef NNPARAMS_H_
// Module: nnparams.h #define NNPARAMS_H_
// Project: Diploma Thesis - Probe Selection for DNA Microarrays
// Type: header file - Nearest Neighbor Parameters / Model.
// Language: c++
// Compiler: microsoft visual c++ 6.0, unix/linux gcc
// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested)
// Database: none
// Description: class CNNParams - Nearest Neighbor Model Parameters
// Author: kaderali
// Date: 9-12/2000
// Copyright: (c) L. Kaderali, 9/2000 - 12/2000
//
// Revision History
// $ 00sep07 LK : created
// 00dec29 LK : changed to include dangling end data
// 01jan09 LK : included CalcSelfTM function
// 01feb07 LK : optimized
// #$
//=============================================================================
#if !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_)
#define AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#include <math.h> #include <math.h>
#include <string.h> #include <string.h>
//#include "../libecoprimer/ecoprimer.h"
#ifdef _pack
#pragma pack(1)
#endif
// following defines to simplify coding... // following defines to simplify coding...
#define ndH(a,b,c,d) nparm->dH[a][b][c][d] #define ndH(a,b,c,d) nparm->dH[a][b][c][d]
#define ndS(a,b,c,d) nparm->dS[a][b][c][d] #define ndS(a,b,c,d) nparm->dS[a][b][c][d]
#define forbidden_enthalpy 1000000000000000000.0f #define forbidden_enthalpy 1000000000000000000.0f
//#define forbidden_enthalpy_div1000 1000000000000000.0f
// forbidden entropy=-rlogc
// #define forbidden_entropy 30.205986374220235304486574573422f
// Boltzmann factor (cal/degrees C*mol)
#define R 1.987f #define R 1.987f
#define SALT_METHOD_SANTALUCIA 1 #define SALT_METHOD_SANTALUCIA 1
#define SALT_METHOD_OWCZARZY 2 #define SALT_METHOD_OWCZARZY 2
// Strand concentration (assumption!) (M)
// #define Ct 0.000001f #define DEF_CONC_PRIMERS 0.0000008
// r*ln(ct/4) as required by many formulas #define DEF_CONC_SEQUENCES 0
//#define rlogc -30.205986374220235304486574573422f #define DEF_SALT 0.05
#define GETNUMCODE(a) bpencoder[a - 'A']
#define GETREVCODE(a) 5-bpencoder[a - 'A']
extern float forbidden_entropy; extern float forbidden_entropy;
//----------------------------------------------------------------------------- static char bpencoder[] = { 1, // A
// class CNNParams 0, // b
//typedef class CNNParams* PNNParams; 2, // C
0,0,0, // d, e, f
3, // G
0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s
4,0, // T,U
0,0,0,0,0}; // v,w,x,y,z
//#pragma GCC visibility push(hidden)
typedef struct CNNParams_st typedef struct CNNParams_st
{ {
//public:
float Ct1; float Ct1;
float Ct2; float Ct2;
float rlogc; float rlogc;
@ -75,30 +52,19 @@ typedef struct CNNParams_st
float kfac; float kfac;
int saltMethod; int saltMethod;
float gcContent; float gcContent;
float new_TM; // ge‰ndert von ML!!! float new_TM;
//CNNParams();
//virtual ~CNNParams();
//private:
float dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign) float dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
float dS[6][6][6][6]; float dS[6][6][6][6];
}CNNParams, * PNNParams; }CNNParams, * PNNParams;
//void nparam_InitParams(PNNParams nparm, float c1=0.000001f, float c2=0.000001f, float kp=1, int sm = SALT_METHOD_SANTALUCIA ); void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm);
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm); int nparam_CountGCContent(char * seq );
void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2); float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_CountGCContent(char * seq ); float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
char nparam_convertNum(char c); float nparam_CalcTM(float entropy,float enthalpy);
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1); float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1); float nparam_GetInitialEntropy(PNNParams nparm) ;
float nparam_CalcTM(float entropy,float enthalpy); float calculateMeltingTemperatureBasic (char * seq);
void nparam_AlterTM(PNNParams nparm, float tm_new); //void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy);
float nparam_CalcSelfTM(PNNParams nparm, char* seq);
float nparam_GetInitialEntropy(PNNParams nparm) ;
char nparam_getComplement(char mychar, int asnum);
int nparam_isMismatch(char,char);
#endif
//#pragma GCC visibility pop
#endif // !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_)