diff --git a/src/Makefile b/src/Makefile index a8011c7..4f75583 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,10 +6,12 @@ PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC)) SRCS= $(PRIMER_SRC) -LIB= -lecoprimer -lecoPCR -lz -lm +LIB= -lecoprimer -lecoPCR -lthermo -lz -lm LIBFILE= libecoPCR/libecoPCR.a \ - libecoprimer/libecoprimer.a + libecoprimer/libecoprimer.a \ + libthermo/libthermo.a \ + include global.mk @@ -41,6 +43,8 @@ libecoPCR/libecoPCR.a: libecoprimer/libecoprimer.a: $(MAKE) -C libecoprimer +libthermo/libthermo.a: + $(MAKE) -C libthermo ######## # @@ -53,6 +57,7 @@ clean: rm -f $(EXEC) $(MAKE) -C libecoPCR clean $(MAKE) -C libecoprimer clean + $(MAKE) -C libthermo clean diff --git a/src/ecoprimer.c b/src/ecoprimer.c index 3c135fb..3f481b0 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -6,6 +6,7 @@ */ #include "libecoprimer/ecoprimer.h" +#include "libthermo/libthermo.h" #include #include #include @@ -13,14 +14,48 @@ #include #include #include +#include #define VERSION "0.3" /* TR: by default, statistics are made on species level*/ #define DEFAULTTAXONRANK "species" static int cmpprintedpairs(const void* p1,const void* p2); +//float _Z27calculateMeltingTemperature_ (char * seq1, char * seq2); +void* lib_handle = NULL; +float (*calcMelTemp)(char*, char*); +void openlibman () +{ + // Open the library. + char* lib_name = "./libPHunterLib.dylib"; + lib_handle = dlopen(lib_name, RTLD_NOW); + if (lib_handle) { + fprintf(stderr, "[%s] dlopen(\"%s\", RTLD_NOW): Successful\n", __FILE__, lib_name); + } + else { + fprintf(stderr, "[%s] Unable to open library: %s\n", + __FILE__, dlerror()); + exit(EXIT_FAILURE); + } + + // Get the symbol addresses. + calcMelTemp = dlsym(lib_handle, "_Z27calculateMeltingTemperaturePcS_"); + if (calcMelTemp) { + fprintf(stderr, "[%s] dlsym(lib_handle, \"addRating\"): Successful\n", __FILE__); + } + else { + fprintf(stderr, "[%s] Unable to get symbol: %s\n", + __FILE__, dlerror()); + exit(EXIT_FAILURE); + } +} + +void closlibman () +{ + if (lib_handle) dlclose(lib_handle); +} /* ----------------------------------------------- */ /* printout help */ /* ----------------------------------------------- */ @@ -162,8 +197,11 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) bool_t goodtmp; bool_t strand; uint32_t i; + float temp; char *c; + char p1[32]; + char p2[32]; if (!asdirect1) w1=ecoComplementWord(w1,options->primer_length); @@ -189,8 +227,41 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) printf("%6d\t",index); - printf("%s\t",ecoUnhashWord(w1,options->primer_length)); - printf("%s",ecoUnhashWord(w2,options->primer_length)); + c = ecoUnhashWord(w1,options->primer_length); + strcpy (p1, c); + c = ecoUnhashWord(w2,options->primer_length); + strcpy (p2, c); + + printf("%s\t", p1); + printf("%s", p2); + + /*For thermo: first word should be 5'-3'*/ + /*if (!asdirect1) + { + wtmp = ecoComplementWord(w1,options->primer_length); + c = ecoUnhashWord(wtmp,options->primer_length); + strcpy (p1, c); + }*/ + /*For thermo: first word should be 5'-3'*/ + /*if (asdirect2) + { + wtmp = ecoComplementWord(w2,options->primer_length); + c = ecoUnhashWord(wtmp,options->primer_length); + strcpy (p2, c); + }*/ + + /*temp = calculateMeltingTemperature (p1, p2);*/ + //float temp = calculateMeltingTemperature ("CTGTTTACCAAAAACATC", "GGTCTGAACTCAGATCAC"); + + temp = calculateMeltingTemperatureBasic(p1); + printf ("\t%4.3f", temp); + + temp = calculateMeltingTemperatureBasic(p2); + printf ("\t%4.3f", temp); + + printf ("\t%d",countGCContent(p1)); + + printf ("\t%d",countGCContent(p2)); printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]); @@ -401,6 +472,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) for (i=0;i < count;i++) printapair(i,sortedpairs[i],options); + } @@ -479,7 +551,12 @@ int main(int argc, char **argv) //printcurrenttime(); //return 0; - + //openlibman (); + //float temp = calculateMeltingTemperature ("GGTCTGAACTCAGATCAC", "CTGTTTACCAAAAACATC"); + //float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC"); + //printf ("temp = %f\n", temp); + //return 0; + initoptions(&options); while ((carg = getopt(argc, argv, "hfvcUDSd:l:L:e:i:r:R:q:3:s:x:t:O:")) != -1) { @@ -715,11 +792,11 @@ int main(int argc, char **argv) /*TR: Added*/ pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options); - + //fprintf(stderr,"buildPrimerPairs finished\n"); // setoktaxforspecificity (&pairs); printpairs (pairs, &options,taxonomy); - + // closlibman (); //ECOFREE(pairs.pairs,"Free pairs table"); diff --git a/src/global.mk b/src/global.mk index 85b8672..a7bcfef 100644 --- a/src/global.mk +++ b/src/global.mk @@ -1,5 +1,5 @@ MACHINE=MAC_OS_X -LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer +LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer -Llibthermo MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $< CC=gcc diff --git a/src/libthermo/Makefile b/src/libthermo/Makefile new file mode 100644 index 0000000..fefcbc7 --- /dev/null +++ b/src/libthermo/Makefile @@ -0,0 +1,26 @@ + +SOURCES = dinkelbach.c \ + galign.c \ + libthermo.c \ + nnparams.c \ + thermalign.c + +SRCS=$(SOURCES) + +OBJECTS= $(patsubst %.c,%.o,$(SOURCES)) + +LIBFILE= libthermo.a +RANLIB= ranlib + + +include ../global.mk + + +all: $(LIBFILE) + +clean: + rm -rf $(OBJECTS) $(LIBFILE) + +$(LIBFILE): $(OBJECTS) + ar -cr $@ $? + $(RANLIB) $@ diff --git a/src/libthermo/dinkelbach.P b/src/libthermo/dinkelbach.P new file mode 100644 index 0000000..179db99 --- /dev/null +++ b/src/libthermo/dinkelbach.P @@ -0,0 +1,5 @@ +dinkelbach.o dinkelbach.P : dinkelbach.c nnparams.h /usr/include/math.h \ + /usr/include/architecture/i386/math.h /usr/include/sys/cdefs.h \ + /usr/include/string.h /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h galign.h \ + dinkelbach.h diff --git a/src/libthermo/dinkelbach.c b/src/libthermo/dinkelbach.c new file mode 100644 index 0000000..9bacffc --- /dev/null +++ b/src/libthermo/dinkelbach.c @@ -0,0 +1,76 @@ +/* + * dinkelbach.cpp + * PHunterLib + * + * Created by Tiayyba Riaz on 6/7/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//#include +//#include +//#include +#include "nnparams.h" +#include "galign.h" +#include "dinkelbach.h" + +void dnkl_initdinkelbach(pdinkelbach dnkl, PNNParams myDiParams, GGAlign mGAlign) +{ + dnkl->myDinkParams = myDiParams; + dnkl->myGAlign = mGAlign; +} + + +int dnkl_iteration(pdinkelbach dnkl, float TempK) +{ + int iteration = 0; + nparam_AlterTM(dnkl->myDinkParams, TempK); + float precursor_oldTM = TempK; + float newTM = TempK; + float oldTM = TempK, lambda, oldlambda; + lambda = 999999999999999999.9; + int positive=0; + + do + { + iteration++; + galn_InitBorder(dnkl->myGAlign); + galn_CalculateTable(dnkl->myGAlign, 1); + oldlambda=lambda; + lambda =galn_GetFreeEnergyK(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj,newTM); + if(lambda < 0.0) + { + if(!positive) + { + //Initial melting temperature greater than expected + newTM = galn_GetEnthalpy(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj)/galn_GetEntropy(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj)-1; + lambda = oldlambda; + nparam_AlterTM(dnkl->myDinkParams, newTM); + } + else + { + precursor_oldTM = oldTM; + } + } + else + { + positive = 1; + oldTM = newTM; + newTM = galn_GetMeltingTempK(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj); + nparam_AlterTM(dnkl->myDinkParams, newTM); + } + } + while(!positive || (newTM - oldTM > 0.001 && (lambda>0.0)&&(lambda 0.001 && ((lambda<0.0)||(lambda>oldlambda))) + { + iteration++; + nparam_AlterTM(dnkl->myDinkParams, precursor_oldTM); + galn_InitBorder(dnkl->myGAlign); + galn_CalculateTable(dnkl->myGAlign, 1); + lambda = galn_GetFreeEnergyK(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj,precursor_oldTM); + newTM = galn_GetMeltingTempK(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj); + nparam_AlterTM(dnkl->myDinkParams, newTM); + } + return iteration; +} diff --git a/src/libthermo/dinkelbach.h b/src/libthermo/dinkelbach.h new file mode 100644 index 0000000..c90c8c1 --- /dev/null +++ b/src/libthermo/dinkelbach.h @@ -0,0 +1,36 @@ +/* + * dinkelbach.h + * PHunterLib + * + * Created by Tiayyba Riaz on 7/2/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//Module: Achievement of the Dinkelbach algorithm + +#ifndef __dinkelbach_h +#define __dinkelbach_h + +//#include +//#include +//#include +#include "nnparams.h" +#include "galign.h" + +//using namespace std; + +//typedef class dinkelbach* DINKEL; +//#pragma GCC visibility push(hidden) +typedef struct dinkelbach_st +{ + PNNParams myDinkParams; + GGAlign myGAlign; + +}dinkelbach, * pdinkelbach; +//#pragma GCC visibility pop + +void dnkl_initdinkelbach(pdinkelbach dnkl, PNNParams, GGAlign); +int dnkl_iteration(pdinkelbach dnkl, float TempK); + +#endif /* __dinkelbach_h */ diff --git a/src/libthermo/galign.P b/src/libthermo/galign.P new file mode 100644 index 0000000..039486a --- /dev/null +++ b/src/libthermo/galign.P @@ -0,0 +1,16 @@ +galign.o galign.P : galign.c /usr/include/math.h \ + /usr/include/architecture/i386/math.h /usr/include/sys/cdefs.h \ + /usr/include/assert.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/stdlib.h \ + /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h galign.h \ + nnparams.h /usr/include/string.h diff --git a/src/libthermo/galign.c b/src/libthermo/galign.c new file mode 100644 index 0000000..0f96688 --- /dev/null +++ b/src/libthermo/galign.c @@ -0,0 +1,685 @@ +/* + * galign.cpp + * PHunterLib + * + * Created by Tiayyba Riaz on 6/7/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//============================================================================= +// Module: galign.cpp +// Project: Cubic Project - Calculation of melting temperature and free +// energy of two DNA strands +// Type: implementation - Thermodynamic Alignment. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class GAlign - Thermodynamic Alignment Algorithm +// Author: leber +// Date: 01/2002 - 02/2002 +// Copyright: (c) L. Kaderali & M. Leber, 01/2002 - 02/2002 +// +// Revision History +// $ 00sep04 LK : created +// $ 00dec30 LK : changed to do local alignment of probe against +// one entire sequence +// $ 01feb07 LK : optimized! +// $ 01aug06 LK : corrected, included salt and concentration input; +// $ made true local alignment (problem with initial mismatches!) +// #$ +//============================================================================= + +#include +#include +#include +#include + +#include "galign.h" + + +#define nndH(a,b,c) (*(galn->dH+((b)*(galn->maxseq1len+1)*3+(a)*3+(c)))) +#define nndS(a,b,c) (*(galn->dS+((b)*(galn->maxseq1len+1)*3+(a)*3+(c)))) + + + +/////////////////////////////////////////////////////////////////////////////// +// Construction/Destruction +/////////////////////////////////////////////////////////////////////////////// + +//------------------------ Constructor ---------------------------------------- +// Initialize Thermalign Object. This will allocate memory for the dynamic +// programming matrices for both entropy and enthalpy of the present +// alignment. The max length of the two sequences to align has to be passed +// to the routine. Also, an initialized CNNParams Object must be passed to +// CThermAlign. Object will be initialized only once, sequences can be +// passed to it later. + +//int s1lmax,s2lmax; // global requiered for DEBUGGING + +void galn_initGAlign(GGAlign galn, int ALength, int BLength, PNNParams myNNParams) +{ + // Create dynamic programming matrices for entropy and enthalpy. + // The first two dimensions are for the two strings respectively, the + // third dimensions differentiates the alignment according to its end: + // type 0 Alignments will be those that align x(i+1) with y(j+1), + // type 1 Alignments align x(i+1) with a gap + // type 2 Alignments align y(i+1) with a gap + // All three of these types must separately be considered to select the + // optimum alignment in the next iteration. + // The alignment algorithm proceeds analogous to the well known + // Smith-Waterman algorithm for global alignments. + + galn->dH = (float *) calloc((ALength+1)*(BLength+1)*3, sizeof (float)); + galn->dS = (float *) calloc((ALength+1)*(BLength+1)*3, sizeof (float)); +// s1lmax=ALength; +// s2lmax=BLength; + + if ((galn->dH==NULL)||(galn->dS==NULL)) + { + printf("Fatal Error: Out of memory!\n"); + exit(-1); + } + + galn->GNNParams = myNNParams; + galn->seq1len = ALength; + galn->maxseq1len = galn->seq1len; + galn->seq2len = BLength; + + memset(galn->dH,0,(ALength+1)*(BLength+1)*3*sizeof(float)); + memset(galn->dS,0,(ALength+1)*(BLength+1)*3*sizeof(float)); + + // set dH / dS entries all to zero! + #ifdef _pack + // this is FAST, but works only if #pragma pack(1) has been set... + #else + // the following is slower, but works also when padding bytes have + // been inserted in the array for alignment by the compiler... + // time is not really critical here, as this initialization is done + // only once... +/* int a, b; + for ( a=0;a<=ALength;a++) + for ( b=0;b<=BLength;b++) + { + nndH(a,b,0)=0.0f; + nndH(a,b,1)=0.0f; + nndH(a,b,2)=0.0f; + nndS(a,b,0)=0.0f; + nndS(a,b,1)=0.0f; + nndS(a,b,2)=0.0f; + }*/ + #endif + galn_InitBorder(galn); // also need to do only once... +// forbidden_entropy = (-(GNNParams->R)*(float)log((GNNParams->Ct)/4.0f)); + return; +} + +//------------------------ Destructor ----------------------------------------- +// Free memory + +void galn_finiGAlign(GGAlign galn) +{ + free (galn->dH); + free (galn->dS); + return; +} + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void galn_InitStrings(char* s1, +// char* s2, +// int s1len, +// int s2len); +// +// PURPOSE: Initialize strings for alignment table! +// +// PARAMETERS: +// s1: Target sequence +// s2: Probe sequence +// s1len: Target sequence length +// s2len: Probe sequence length +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 01jan03 LK : created +// $ 01feb07 LK : removed maximum local stuff +// #$ + +void galn_InitStrings(GGAlign galn, char* s1,char* s2,int s1len ,int s2len,int minxpos) +{ + galn->seq1=s1; galn->seq1len=s1len; + galn->seq2=s2; galn->seq2len=s2len; + nparam_UpdateParams(galn->GNNParams, s1,s2); +/* lk01feb07: removed maxloc-stuff + // if maximum local is not in reused subtable, need to reset! + if ((maxlocj>=minxpos)||(maxloci!=seq1len)) + { + maxloci=0; maxlocj=0; maxlocg=0; + } +*/ +// InitBorder(); //lk01feb07: removed, as it suffices if this is done once in constructor + return; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void galn_InitBorder(); +// +// PURPOSE: Initialize the lower and left border of the dynamic +// programming table. +// +// PARAMETERS: +// none +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 00sep06 LK : created +// $ 00dec30 LK : complete revision - simply init table now, no +// more saving etc... +// $ 01feb07 LK : removed local alignment stuff and optimized +// #$ + +void galn_InitBorder(GGAlign galn) +{ + // Initialization for new alignment! + galn->maxlocg = -99999; // maximum melting temperature found (kelvin) + galn->maxloci = 0; + galn->maxlocj = 0; + galn->maxloct = 0; + int i, j; + + for ( i=0; i<=galn->seq1len; i++) + { + nndH(i,0,0) = 0.0f; nndH(i,0,1) = 0.0f; nndH(i,0,2) = 0.0f; + nndS(i,0,0) = nparam_GetInitialEntropy(galn->GNNParams); + nndS(i,0,1) = nparam_GetInitialEntropy(galn->GNNParams); + nndS(i,0,2) = nparam_GetInitialEntropy(galn->GNNParams); + } + for ( j=1; j<=galn->seq2len; j++) { + nndH(0,j,0) = 0.0f; nndH(0,j,1) = 0.0f; nndH(0,j,2) = 0.0f; + nndS(0,j,0) = nparam_GetInitialEntropy(galn->GNNParams); + nndS(0,j,1) = nparam_GetInitialEntropy(galn->GNNParams); + nndS(0,j,2) = nparam_GetInitialEntropy(galn->GNNParams); + } + + return; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: galn_CalculateTable(); +// +// PURPOSE: Compute the entire dynamic programming table cell by cell. +// Requieres that InitBorder has been called. +// Computations will be done in the usual rowwise manner. +// +// PARAMETERS: +// int startRow: Row that computation shall commence at! +// +// RETURN VALUE: +// void - Data will be written directly to dH and dS in this object +// +// REVISION HISTORY +// $ 00sep06 : created LK +// 00dec31 : modified to begin at a specified row! +// #$ + +void galn_CalculateTable(GGAlign galn, int startRow) +{ +// #define forbidden_enthalpy 1000000000000000000.0f + register int i; + register int iminusone; + register int j; + register int jminusone; + // Determine previous character! + register char prevchari; + register char prevcharj; + register char seq1char; + register char seq2char; + register float entropy1; + register float entropy2; + register float entropy3; + register float enthalpy1; + register float enthalpy2; + register float enthalpy3; + register float g1; + register float g2; + register float g3; + float maxg; +// ofstream dboutstream("debug.out"); +// assert(startRow>0); + for (j=startRow; j<=galn->seq2len; j++) + { + if (j>1) + prevcharj = galn->seq2[j-2]; + else + prevcharj = 0; + jminusone = j - 1; + seq2char = galn->seq2[jminusone]; + for (i=1; i<=galn->seq1len; i++) + { + // local variables + if (i>1) + prevchari = galn->seq1[i-2]; + else + prevchari = 0; + iminusone = i - 1; + seq1char=galn->seq1[iminusone]; + + // --------------- Upper cell: Alignment of seq1[i] with seg2[j] + entropy1 = nndS(iminusone,jminusone,0); + enthalpy1 = nndH(iminusone,jminusone,0) ; + entropy2 = nndS(iminusone,jminusone,1); + enthalpy2 = nndH(iminusone,jminusone,1); + entropy3 = nndS(iminusone,jminusone,2); + enthalpy3 = nndH(iminusone,jminusone,2); + if (enthalpy1GNNParams, prevchari,seq1char, prevcharj, seq2char); + enthalpy1 +=nparam_GetEnthalpy(galn->GNNParams, prevchari, seq1char, prevcharj, seq2char); + + } + if (enthalpy2GNNParams, prevchari,seq1char, 0, seq2char); // 0 is gap! + entropy2 += nparam_GetEntropy(galn->GNNParams, prevchari,seq1char, 0, seq2char); + } + if(enthalpy3GNNParams, 0, seq1char,prevcharj, seq2char); + entropy3 +=nparam_GetEntropy(galn->GNNParams, 0, seq1char, prevcharj, seq2char); + + } + // choose optimum combination + g1 = nparam_CalcG(galn->GNNParams, entropy1, enthalpy1); + g2 = nparam_CalcG(galn->GNNParams, entropy2, enthalpy2); + g3 = nparam_CalcG(galn->GNNParams, entropy3, enthalpy3); + if ((g1>=g2)&&(g1>=g3)) + { + nndS(i,j,0) = entropy1; + nndH(i,j,0) = enthalpy1; + maxg=g1; + } + else if (g2>=g3) + { + nndS(i,j,0) = entropy2; + nndH(i,j,0) = enthalpy2; + maxg=g2; + } + else + { + nndS(i,j,0) = entropy3; + nndH(i,j,0) = enthalpy3; + maxg=g3; + } + // set to zero if alignment so far is mismatches only (tm will be zero -> no need to reset maxg)! + if(nndH(i,j,0)==0) + nndS(i,j,0)=nparam_GetInitialEntropy(galn->GNNParams); + // check if local maximum found! + if (((i==galn->seq1len)||(j==galn->seq2len))&&(maxg>=galn->maxlocg)) + { + galn->maxlocg=maxg; + galn->maxloci=i; + galn->maxlocj=j; + galn->maxloct=0; + } + // set to zero if alignment so far is mismatches only! + if (nndH(i,j,0)==0) + nndS(i,j,0)=nparam_GetInitialEntropy(galn->GNNParams); + + // --------------- Middle cell: Alignment of seq1[i] with '-' + // following changes lk 01jan08 + // we do not allow $- in the beginning, therefore set to forbidden if + // j=1 + if (j==1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + // also, disallow -$ in the end, therefore forbid if j=seq2len-1 + else if (j==galn->seq2len-1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + else + { + // end lk01jan08 + // -- entropy + entropy1 = nndS(iminusone,j,0); + entropy2 = nndS(iminusone,j,1); + enthalpy1 = nndH(iminusone,j,0); + enthalpy2 = nndH(iminusone,j,1); + if (enthalpy1GNNParams, prevchari, seq1char, seq2char, 0); + enthalpy1 +=nparam_GetEnthalpy(galn->GNNParams, prevchari,seq1char, seq2char, 0); + } + if (enthalpy2GNNParams, prevchari, seq1char, 0, 0); + enthalpy2 +=nparam_GetEnthalpy(galn->GNNParams, prevchari, seq1char, 0, 0); + } + } + // -- choose optimum combination + g1 = nparam_CalcG(galn->GNNParams, entropy1, enthalpy1); + g2 = nparam_CalcG(galn->GNNParams, entropy2, enthalpy2); + if (g1>=g2) + { + nndS(i,j,1)=entropy1; + nndH(i,j,1)=enthalpy1; + maxg=g1; + } + else + { + nndS(i,j,1)=entropy2; + nndH(i,j,1)=enthalpy2; + maxg=g2; + } + // check if local maximum found! + if (((i==galn->seq1len)||(j==galn->seq2len))&&(maxg>=galn->maxlocg)) + { + galn->maxlocg=maxg; + galn->maxloci=i; + galn->maxlocj=j; + galn->maxloct=1; + } + // set to zero if alignment so far is mismatches only! + if (nndH(i,j,1)==0) + nndS(i,j,1)=nparam_GetInitialEntropy(galn->GNNParams); + + // --------------- Lower cell: Alignment of '-' with seq2[j] + // following changes lk 01jan08 + // we do not allow $- in the beginning, therefore set to forbidden if + // i=1 + if (i==1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + // also, disallow -$ in the end, therefore forbid if i=seq1len-1 + else if (i==galn->seq1len-1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + else + { + // end lk01jan08 + entropy1 = nndS(i,jminusone,0); + entropy2 = nndS(i,jminusone,2); + enthalpy1 = nndH(i,jminusone,0); + enthalpy2 = nndH(i,jminusone,2); + if (enthalpy1GNNParams, seq1char, 0, prevcharj, seq2char); + enthalpy1 +=nparam_GetEnthalpy(galn->GNNParams, seq1char, 0, prevcharj, seq2char); + } + if(enthalpy2GNNParams, 0,0, prevcharj, seq2char); + enthalpy2 +=nparam_GetEnthalpy(galn->GNNParams, 0, 0, prevcharj, seq2char); + } + } + // -- choose optimum combination + g1 = nparam_CalcG(galn->GNNParams, entropy1, enthalpy1); + g2 = nparam_CalcG(galn->GNNParams, entropy2, enthalpy2); + if (g1>=g2) + { + nndS(i,j,2)=entropy1; + nndH(i,j,2)=enthalpy1; + maxg=g1; + } + else + { + nndS(i,j,2)=entropy2; + nndH(i,j,2)=enthalpy2; + maxg=g2; + } + // check if local maximum found! + if (((i==galn->seq1len)||(j==galn->seq2len))&&(maxg>=galn->maxlocg)) + { + galn->maxlocg=maxg; + galn->maxloci=i; + galn->maxlocj=j; + galn->maxloct=2; + } + // set to zero if alignment so far is mismatches only! + if (nndH(i,j,2)==0) + nndS(i,j,2)=nparam_GetInitialEntropy(galn->GNNParams); + + + /* + dboutstream<<"After cell ("<seq1len;i++) + { + for( j=0;j<=galn->seq2len;j++) + { + printf ("%f ", nndH(i,j,level)); + } + printf ("\n"); + } + printf ("\n"); +} + +void galn_printEntropyTable(GGAlign galn, int level) +{ + int i, j; + for( i=0;i<=galn->seq1len;i++) + { + for( j=0;j<=galn->seq2len;j++) + { + printf ("%f ", nndH(i,j,level)); + } + printf ("\n"); + } + printf ("\n"); +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetEntropy(int i, int j); +// +// PURPOSE: Return Entropy for given cell +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Entropy (dS) +// +// REVISION HISTORY +// $ 00sep23 : created LK +// #$ + +/* inline */ float galn_GetEntropy(GGAlign galn, int i, int j) +{ + float entropy; + + // first, need to determine optimum values for dG and dH + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(nndS(i,j,0),nndH(i,j,0)); + tm1 = nparam_CalcTM(nndS(i,j,1),nndH(i,j,1)); + tm2 = nparam_CalcTM(nndS(i,j,2),nndH(i,j,2)); + if ((tm0>=tm1)&&(tm0>=tm2)) + entropy = nndS(i,j,0); + else if ((tm1>=tm0)&&(tm1>=tm2)) + entropy = nndS(i,j,1); + else + entropy = nndS(i,j,2); + + return entropy; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetEnthalpy(int i, int j); +// +// PURPOSE: Return Enthalpy for given cell +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Enthalpy (dS) +// +// REVISION HISTORY +// $ 00sep23 : created LK +// #$ + +/* inline */ float galn_GetEnthalpy(GGAlign galn, int i, int j) +{ + float enthalpy; + + // first, need to determine optimum values for dG and dH + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(nndS(i,j,0),nndH(i,j,0)); + tm1 = nparam_CalcTM(nndS(i,j,1),nndH(i,j,1)); + tm2 = nparam_CalcTM(nndS(i,j,2),nndH(i,j,2)); + if ((tm0>=tm1)&&(tm0>=tm2)) + enthalpy = nndH(i,j,0); + else if ((tm1>=tm0)&&(tm1>=tm2)) + enthalpy = nndH(i,j,1); + else + enthalpy = nndH(i,j,2); + + return enthalpy; +} + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergyK(int i, int j, float t); +// +// PURPOSE: Return free Energy for given cell at given temperature +// +// PARAMETERS: +// i, j - Cell coordinates +// t - Temperature in Kelvin +// +// RETURN VALUE: +// float - free Energy value (dG) at given temperature (Kelvin) +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float galn_GetFreeEnergyK(GGAlign galn, int i, int j, float t) +{ + // dG = dH - T * dS + + float entropy; + float enthalpy; + + // first, need to determine optimum values for dG and dH + entropy = -galn_GetEntropy(galn, i,j); + enthalpy = -galn_GetEnthalpy(galn, i,j); + + return enthalpy - t * entropy; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergyC(int i, int j, int t); +// +// PURPOSE: Return free Energy for given cell at given temperature +// +// PARAMETERS: +// i, j - Cell coordinates +// t - Temperature in Celsius +// +// RETURN VALUE: +// float - free Energy value (dG) at melting temperature +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float galn_GetFreeEnergyC(GGAlign galn, int i, int j, float t) +{ + // dG = dH - T * dS + + float entropy; + float enthalpy; + + // first, need to determine optimum values for dG and dH + entropy = galn_GetEntropy(galn, i,j); + enthalpy = galn_GetEnthalpy(galn, i,j); + + return enthalpy - (t + 273.15f) * entropy; +} + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float galn_GetMeltingTempC(int i, int j) +// +// PURPOSE: Calculate TM for given cell. Return in Celsius. +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Melting Temperature TM +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float galn_GetMeltingTempC(GGAlign galn, int i, int j) +{ + return galn_GetMeltingTempK(galn, i,j) - 273.15f; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float galn_GetMeltingTempK(int i, int j) +// +// PURPOSE: Calculate TM for given cell. Return in Kelvin. +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Melting Temperature TM +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float galn_GetMeltingTempK(GGAlign galn, int i, int j) +{ + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(nndS(i,j,0),nndH(i,j,0)); + tm1 = nparam_CalcTM(nndS(i,j,1),nndH(i,j,1)); + tm2 = nparam_CalcTM(nndS(i,j,2),nndH(i,j,2)); + float maxg; + if ((tm0>=tm1)&&(tm0>=tm2)) + maxg=tm0; + else if ((tm1>=tm0)&&(tm1>=tm2)) + maxg=tm1; + else + maxg=tm2; + if (maxg<0) + maxg=0; + return maxg; +} + + diff --git a/src/libthermo/galign.h b/src/libthermo/galign.h new file mode 100644 index 0000000..aad5541 --- /dev/null +++ b/src/libthermo/galign.h @@ -0,0 +1,97 @@ +/* + * galign.h + * PHunterLib + * + * Created by Tiayyba Riaz on 7/2/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//============================================================================= +// Module: galign.h +// Project: Cubic Project - Calculation of melting temperature and free +// energy of two DNA strands +// Type: header file - Thermodynamic Alignment. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class GAlign - Thermodynamic Alignment Algorithm +// Author: leber +// Date: 01/2002 - 02/2002 +// Copyright: (c) L. Kaderali & M. Leber, 01/2002 - 02/2002 +// +// Revision History +// $ 00sep04 LK : created +// $ 00dec30 LK : changed to do local alignment of probe against +// one entire sequence +// $ 01feb07 LK : optimized +// #$ +//============================================================================= + +#if !defined(AFX_GAlign_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_) +#define AFX_GAlign_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +//#include +//#include +//#include +#include "nnparams.h" // Nearest Neighbor Parameters +//#include "../gsfxtree/gsfxtree.h" // Suffix Tree Stuff + +//using namespace std; + +#ifdef _pack +#pragma pack(1) +#endif + +//----------------------------------------------------------------------------- +// class GAlign +//#pragma GCC visibility push(hidden) +//typedef class GAlign* GGAlign; + +typedef struct GAlign_st +{ +//public: +// lk01feb07: removed maxlocstuff as not requiered by thermtreealign... + float maxlocg; // maximum local dG value found + int maxloci; // i position thereof + int maxlocj; // j position thereof + int maxloct; // and type of maximum alignment! + int targetNumber; // id number of target sequence +// PGSfxLeaf probeNode; // identifier for probe sequence +//private: + char *seq1; // Sequence 1 + char *seq2; // Sequence 2 + int seq1len; // Length of Sequence 1 + int seq2len; // Length of Sequence 2 + int maxseq1len; // length of longest target... + float *dH; // Dynamic Programming Table for Entropy + float *dS; // Dynamic Programming Table for Enthalpy + PNNParams GNNParams; // Nearest Neighbor parameters +// float forbidden_entropy; +}GAlign, * GGAlign; + +void galn_initGAlign(GGAlign galn, int, int, PNNParams); +void galn_finiGAlign(GGAlign galn); +//void galn_InitStrings(GGAlign galn, char*,char*,int,int,int=0); +void galn_InitStrings(GGAlign galn, char*,char*,int,int,int); +void galn_InitBorder(GGAlign galn); +//void galn_CalculateTable(GGAlign galn, int=1); +void galn_CalculateTable(GGAlign galn, int); +float galn_GetEntropy(GGAlign galn, int, int); +float galn_GetEnthalpy(GGAlign galn, int, int); +float galn_GetFreeEnergyK(GGAlign galn, int, int, float); +float galn_GetFreeEnergyC(GGAlign galn, int, int, float); +float galn_GetMeltingTempC(GGAlign galn, int, int); +float galn_GetMeltingTempK(GGAlign galn, int, int); + +void galn_printEnthalpyTable(GGAlign galn, int level); +void galn_printEntropyTable(GGAlign galn, int level); + +//#pragma GCC visibility pop +#endif //!defined(AFX_GAlign_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_) + diff --git a/src/libthermo/libthermo.P b/src/libthermo/libthermo.P new file mode 100644 index 0000000..f0802d5 --- /dev/null +++ b/src/libthermo/libthermo.P @@ -0,0 +1,15 @@ +libthermo.o libthermo.P : libthermo.c /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h libthermo.h \ + nnparams.h /usr/include/math.h /usr/include/architecture/i386/math.h \ + /usr/include/string.h thermalign.h galign.h dinkelbach.h diff --git a/src/libthermo/libthermo.a b/src/libthermo/libthermo.a new file mode 100644 index 0000000..00900a2 Binary files /dev/null and b/src/libthermo/libthermo.a differ diff --git a/src/libthermo/libthermo.c b/src/libthermo/libthermo.c new file mode 100644 index 0000000..2fea2a4 --- /dev/null +++ b/src/libthermo/libthermo.c @@ -0,0 +1,190 @@ +/* + * PHunterLib.cp + * PHunterLib + * + * Created by Tiayyba Riaz on 6/7/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include +#include "libthermo.h" +#include "nnparams.h" +#include "thermalign.h" +#include "galign.h" +#include "dinkelbach.h" + + +//#define EXPORT __attribute__((visibility("default"))) + +//char targetsFile [1000]=""; +//char nonTargetsFile [1000]=""; +//char primersFile [1000]=""; +//char primersOutputFile [1000]=""; +//int minLength = DEF_MIN_PRIMER_LENGTH; +//int maxLength = DEF_MAX_PRIMER_LENGTH; +//int productMinLength = DEF_MIN_PROD_LENGTH; +//int productMaxLength = DEF_MAX_PROD_LENGTH; +//char forwardPrimer [PRIMERS_MAX_SIZE] = DEF_FORWARD_PRIMER; +//char reversePrimer [PRIMERS_MAX_SIZE] = DEF_REVERSE_PRIMER; +//int beginPosForward = DEF_BEGINPOS_FORWARD; +//int endPosForward = DEF_ENDPOS_FORWARD; +//int beginPosReverse = DEF_BEGINPOS_REVERSE; +//int endPosReverse = DEF_ENDPOS_REVERSE; +//char maskTargets [PRIMERS_MAX_SIZE] = DEF_MASK_TARGETS; +//char maskNonTargets [PRIMERS_MAX_SIZE] = DEF_MASK_NONTARGETS; +//char degeneracy[PRIMERS_MAX_SIZE]=DEF_DEGENERACY; +//int nseqForCandidates = DEF_SEQ_FOR_CANDS; +//float minCoverageTargets = DEF_MIN_COVERAGE_TARGETS; +//float maxCoverageNonTargets = DEF_MAX_COVERAGE_NONTARGETS; +//float maxSelfScore = DEF_MAX_SELF_SCORE; +//float maxEndScore = DEF_MAX_END_SCORE; +//float minGCContent = DEF_MIN_GCCONTENT; +//float maxGCContent = DEF_MAX_GCCONTENT; +//int gcClamp = DEF_GCCLAMP; +//int maxPolyX = DEF_MAX_POLY_X; +float concPrimers = DEF_CONC_PRIMERS; +float concSequences = DEF_CONC_SEQUENCES; +float salt = DEF_SALT; +int salMethod = SALT_METHOD_SANTALUCIA; +//float minTempTargets=DEF_MIN_TEMP_TARGETS; +//float maxTempTargets=DEF_MAX_TEMP_TARGETS; +//float maxTempNonTargets=DEF_MAX_TEMP_NONTARGETS; +//float deltaTempNonTargets=DEF_DELTA_TEMP_NONTARGETS; +//float maxPairTempDiff=DEF_MAX_PAIR_TEMP_DIFF; +//char primersLabel [1000] = DEF_PRIMERS_LABEL; +//int fullStats = DEF_FULL_STATS; +//char seqRank [10000] = DEF_SEQ_RANK; + + +int countGCContent(char * seq ) { + int lseq = strlen(seq); + int k; + int count = 0; + for( k=0;k0;i--) { + seq[i]=seq[i-1]; + } + seq[0] = '$'; +} + +void removeDollarSigns(char * seq) { + int n = strlen(seq); + int i; + for( i=0;i +#include +#include +#include"nnparams.h" + +#ifdef _pack +#pragma pack(1) +#endif + + + +float forbidden_entropy; + +/////////////////////////////////////////////////////////////////////////////// +// Construction/Destruction +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void CNNParams::InitParams() +// +// PURPOSE: Initialize nearest neighbor parameters. For now, simply set +// params as requiered. Extend to read from file sometime. +// +// PARAMETERS: +// none +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 00sep06 : created LK +// 00dec17 : modified to use new parameters LK +// 00dec29 : included dangling end data LK +// #$ + +void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) +{ + nparm->Ct1 = c1; + nparm->Ct2 = c2; + nparm->kplus = kp; + int maxCT = 1; + if(nparm->Ct2 > nparm->Ct1) + { + maxCT = 2; + } + float ctFactor; + if(nparm->Ct1 == nparm->Ct2) + { + ctFactor = nparm->Ct1/2; + } + else if (maxCT == 1) + { + ctFactor = nparm->Ct1-nparm->Ct2/2; + } + else + { + ctFactor = nparm->Ct2-nparm->Ct1/2; + } + nparm->rlogc = R * log(ctFactor); + forbidden_entropy = nparm->rlogc; + nparm->kfac = 0.368 * log (nparm->kplus); + nparm->saltMethod = sm; + int x,y,a,b; // variables used as counters... + + // Set all parameters to zero! + memset(nparm->dH,0,sizeof(nparm->dH)); + memset(nparm->dS,0,sizeof(nparm->dS)); + + // Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small! + for (x=1;x<=4;x++) + { + for (y=1;y<=4;y++) + { + ndH(0,x,y,0)=forbidden_enthalpy; + ndS(0,x,y,0)=forbidden_entropy; + ndH(x,0,0,y)=forbidden_enthalpy; + ndS(x,0,0,y)=forbidden_entropy; + ndH(x,0,y,0)=forbidden_enthalpy; + ndS(x,0,y,0)=forbidden_entropy; + // forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap! + ndH(x,5,y,0)=forbidden_enthalpy; + ndS(x,5,y,0)=forbidden_entropy; + ndH(x,0,y,5)=forbidden_enthalpy; + ndS(x,0,y,5)=forbidden_entropy; + ndH(5,x,0,y)=forbidden_enthalpy; + ndS(5,x,0,y)=forbidden_entropy; + ndH(0,x,5,y)=forbidden_enthalpy; + ndS(0,x,5,y)=forbidden_entropy; + // forbid X$/-Y etc. + ndH(x,5,0,y)=forbidden_enthalpy; + ndS(x,5,0,y)=forbidden_entropy; + ndH(x,0,5,y)=forbidden_enthalpy; + ndS(x,0,5,y)=forbidden_entropy; + ndH(5,x,y,0)=forbidden_enthalpy; + ndS(5,x,y,0)=forbidden_entropy; + ndH(0,x,y,5)=forbidden_enthalpy; + ndS(0,x,y,5)=forbidden_entropy; + + } + // also, forbid x-/-- and --/x-, i.e. no two inner gaps paired + ndH(x,0,0,0)=forbidden_enthalpy; + ndS(x,0,0,0)=forbidden_entropy; + ndH(0,0,x,0)=forbidden_enthalpy; + ndS(0,0,x,0)=forbidden_entropy; + // x-/-$ + ndH(x,0,0,5)=forbidden_enthalpy; + ndS(x,0,0,5)=forbidden_entropy; + ndH(5,0,0,x)=forbidden_enthalpy; + ndS(5,0,0,x)=forbidden_entropy; + ndH(0,5,x,0)=forbidden_enthalpy; + ndS(x,0,0,5)=forbidden_entropy; + ndH(0,x,5,0)=forbidden_enthalpy; + ndS(0,x,5,0)=forbidden_entropy; + } + // forbid --/-- + ndH(0,0,0,0)=forbidden_enthalpy; + ndS(0,0,0,0)=forbidden_entropy; + + ndH(5,0,0,0)=forbidden_enthalpy; + ndS(5,0,0,0)=forbidden_entropy; + ndH(0,0,5,0)=forbidden_enthalpy; + ndS(0,0,5,0)=forbidden_entropy; + ndH(0,5,5,0)=forbidden_enthalpy; + ndS(0,5,5,0)=forbidden_entropy; + + // Interior loops (double Mismatches) + #define iloop_entropy -0.97f + #define iloop_enthalpy 0.0f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + for (a=1; a<=4; a++) + for (b=1; b<=4; b++) + // AT and CG pair, and as A=1, C=2, G=3, T=4 this means + // we have Watson-Crick pairs if (x+a==5) and (y+b)==5. + if (!((x+a==5)||(y+b==5))) + { + // No watson-crick-pair, i.e. double mismatch! + // set enthalpy/entropy to loop expansion! + ndH(x,y,a,b) = iloop_enthalpy; + ndS(x,y,a,b) = iloop_entropy; + } + + // xy/-- and --/xy (Bulge Loops of size > 1) + #define bloop_entropy -1.3f + #define bloop_enthalpy 0.0f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + { + ndH(x,y,0,0) = bloop_enthalpy; + ndS(x,y,0,0) = bloop_entropy; + ndH(0,0,x,y) = bloop_enthalpy; + ndS(0,0,x,y) = bloop_entropy; + } + + // x-/ya abd xa/y- as well as -x/ay and ax/-y + // bulge opening and closing parameters with + // adjacent matches / mismatches + // obulge_mism and cbulge_mism chosen so high to avoid + // AAAAAAAAA + // T--G----T + // being better than + // AAAAAAAAA + // TG------T + #define obulge_match_H (-2.66f * 1000) + #define obulge_match_S -14.22f + #define cbulge_match_H (-2.66f * 1000) + #define cbulge_match_S -14.22f + #define obulge_mism_H (0.0f * 1000) + #define obulge_mism_S -6.45f + #define cbulge_mism_H 0.0f + #define cbulge_mism_S -6.45f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + for (a=1; a<=4; a++) + { + if (x+y==5) // other base pair matches! + { + ndH(x,0,y,a)=obulge_match_H; // bulge opening + ndS(x,0,y,a)=obulge_match_S; + ndH(x,a,y,0)=obulge_match_H; + ndS(x,a,y,0)=obulge_match_S; + ndH(0,x,a,y)=cbulge_match_H; // bulge closing + ndS(0,x,a,y)=cbulge_match_S; + ndH(a,x,0,y)=cbulge_match_H; + ndS(a,x,0,y)=cbulge_match_S; + } + else + { // mismatch in other base pair! + ndH(x,0,y,a)=obulge_mism_H; // bulge opening + ndS(x,0,y,a)=obulge_mism_S; + ndH(x,a,y,0)=obulge_mism_H; + ndS(x,a,y,0)=obulge_mism_S; + ndH(0,x,a,y)=cbulge_mism_H; // bulge closing + ndS(0,x,a,y)=cbulge_mism_S; + ndH(a,x,0,y)=cbulge_mism_H; + ndS(a,x,0,y)=cbulge_mism_S; + } + } + + // Watson-Crick pairs (note that only ten are unique, as obviously + // 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc. + ndH(1,1,4,4)=-7.6f*1000; ndS(1,1,4,4)=-21.3f; // AA/TT 04 + ndH(1,2,4,3)=-8.4f*1000; ndS(1,2,4,3)=-22.4f; // AC/TG adapted GT/CA + ndH(1,3,4,2)=-7.8f*1000; ndS(1,3,4,2)=-21.0f; // AG/TC adapted CT/GA + ndH(1,4,4,1)=-7.2f*1000; ndS(1,4,4,1)=-20.4f; // AT/TA 04 + ndH(2,1,3,4)=-8.5f*1000; ndS(2,1,3,4)=-22.7f; // CA/GT 04 + ndH(2,2,3,3)=-8.0f*1000; ndS(2,2,3,3)=-19.9f; // CC/GG adapted GG/CC + ndH(2,3,3,2)=-10.6f*1000; ndS(2,3,3,2)=-27.2f; // CG/GC 04 + ndH(2,4,3,1)=-7.8f*1000; ndS(2,4,3,1)=-21.0f; // CT/GA 04 + ndH(3,1,2,4)=-8.2f*1000; ndS(3,1,2,4)=-22.2f; // GA/CT 04 + ndH(3,2,2,3)=-9.8f*1000; ndS(3,2,2,3)=-24.4f; // GC/CG 04 + ndH(3,3,2,2)=-8.0f*1000; ndS(3,3,2,2)=-19.9f; // GG/CC 04 + ndH(3,4,2,1)=-8.4f*1000; ndS(3,4,2,1)=-22.4f; // GT/CA 04 + ndH(4,1,1,4)=-7.2f*1000; ndS(4,1,1,4)=-21.3f; // TA/AT 04 + ndH(4,2,1,3)=-8.2f*1000; ndS(4,2,1,3)=-22.2f; // TC/AG adapted GA/CT + ndH(4,3,1,2)=-8.5f*1000; ndS(4,3,1,2)=-22.7f; // TG/AC adapted CA/GT + ndH(4,4,1,1)=-7.6f*1000; ndS(4,4,1,1)=-21.3f; // TT/AA adapted AA/TT + + // A-C Mismatches (Values for pH 7.0) + ndH(1,1,2,4)=7.6f*1000; ndS(1,1,2,4)=20.2f; // AA/CT + ndH(1,1,4,2)=2.3f*1000; ndS(1,1,4,2)=4.6f; // AA/TC + ndH(1,2,2,3)=-0.7f*1000; ndS(1,2,2,3)=-3.8f; // AC/CG + ndH(1,2,4,1)=5.3f*1000; ndS(1,2,4,1)=14.6f; // AC/TA + ndH(1,3,2,2)=0.6f*1000; ndS(1,3,2,2)=-0.6f; // AG/CC + ndH(1,4,2,1)=5.3f*1000; ndS(1,4,2,1)=14.6f; // AT/CA + ndH(2,1,1,4)=3.4f*1000; ndS(2,1,1,4)=8.0f; // CA/AT + ndH(2,1,3,2)=1.9f*1000; ndS(2,1,3,2)=3.7f; // CA/GC + ndH(2,2,1,3)=5.2f*1000; ndS(2,2,1,3)=14.2f; // CC/AG + ndH(2,2,3,1)=0.6f*1000; ndS(2,2,3,1)=-0.6f; // CC/GA + ndH(2,3,1,2)=1.9f*1000; ndS(2,3,1,2)=3.7f; // CG/AC + ndH(2,4,1,1)=2.3f*1000; ndS(2,4,1,1)=4.6f; // CT/AA + ndH(3,1,2,2)=5.2f*1000; ndS(3,1,2,2)=14.2f; // GA/CC + ndH(3,2,2,1)=-0.7f*1000; ndS(3,2,2,1)=-3.8f; // GC/CA + ndH(4,1,1,2)=3.4f*1000; ndS(4,1,1,2)=8.0f; // TA/AC + ndH(4,2,1,1)=7.6f*1000; ndS(4,2,1,1)=20.2f; // TC/AA + + // C-T Mismatches + ndH(1,2,4,4)=0.7f*1000; ndS(1,2,4,4)=0.2f; // AC/TT + ndH(1,4,4,2)=-1.2f*1000; ndS(1,4,4,2)=-6.2f; // AT/TC + ndH(2,1,4,4)=1.0f*1000; ndS(2,1,4,4)=0.7f; // CA/TT + ndH(2,2,3,4)=-0.8f*1000; ndS(2,2,3,4)=-4.5f; // CC/GT + ndH(2,2,4,3)=5.2f*1000; ndS(2,2,4,3)=13.5f; // CC/TG + ndH(2,3,4,2)=-1.5f*1000; ndS(2,3,4,2)=-6.1f; // CG/TC + ndH(2,4,3,2)=-1.5f*1000; ndS(2,4,3,2)=-6.1f; // CT/GC + ndH(2,4,4,1)=-1.2f*1000; ndS(2,4,4,1)=-6.2f; // CT/TA + ndH(3,2,2,4)=2.3f*1000; ndS(3,2,2,4)=5.4f; // GC/CT + ndH(3,4,2,2)=5.2f*1000; ndS(3,4,2,2)=13.5f; // GT/CC + ndH(4,1,2,4)=1.2f*1000; ndS(4,1,2,4)=0.7f; // TA/CT + ndH(4,2,2,3)=2.3f*1000; ndS(4,2,2,3)=5.4f; // TC/CG + ndH(4,2,1,4)=1.2f*1000; ndS(4,2,1,4)=0.7f; // TC/AT + ndH(4,3,2,2)=-0.8f*1000; ndS(4,3,2,2)=-4.5f; // TG/CC + ndH(4,4,2,1)=0.7f*1000; ndS(4,4,2,1)=0.2f; // TT/CA + ndH(4,4,1,2)=1.0f*1000; ndS(4,4,1,2)=0.7f; // TT/AC + + // G-A Mismatches + ndH(1,1,3,4)=3.0f*1000; ndS(1,1,3,4)=7.4f; // AA/GT + ndH(1,1,4,3)=-0.6f*1000; ndS(1,1,4,3)=-2.3f; // AA/TG + ndH(1,2,3,3)=0.5f*1000; ndS(1,2,3,3)=3.2f; // AC/GG + ndH(1,3,3,2)=-4.0f*1000; ndS(1,3,3,2)=-13.2f; // AG/GC + ndH(1,3,4,1)=-0.7f*1000; ndS(1,3,4,1)=-2.3f; // AG/TA + ndH(1,4,3,1)=-0.7f*1000; ndS(1,4,3,1)=-2.3f; // AT/GA + ndH(2,1,3,3)=-0.7f*1000; ndS(2,1,3,3)=-2.3f; // CA/GG + ndH(2,3,3,1)=-4.0f*1000; ndS(2,3,3,1)=-13.2f; // CG/GA + ndH(3,1,1,4)=0.7f*1000; ndS(3,1,1,4)=0.7f; // GA/AT + ndH(3,1,2,3)=-0.6f*1000; ndS(3,1,2,3)=-1.0f; // GA/CG + ndH(3,2,1,3)=-0.6f*1000; ndS(3,2,1,3)=-1.0f; // GC/AG + ndH(3,3,1,2)=-0.7f*1000; ndS(3,3,1,2)=-2.3f; // GG/AC + ndH(3,3,2,1)=0.5f*1000; ndS(3,3,2,1)=3.2f; // GG/CA + ndH(3,4,1,1)=-0.6f*1000; ndS(3,4,1,1)=-2.3f; // GT/AA + ndH(4,1,1,3)=0.7f*1000; ndS(4,1,1,3)=0.7f; // TA/AG + ndH(4,3,1,1)=3.0f*1000; ndS(4,3,1,1)=7.4f; // TG/AA + + // G-T Mismatches + ndH(1,3,4,4)=1.0f*1000; ndS(1,3,4,4)=0.9f; // AG/TT + ndH(1,4,4,3)=-2.5f*1000; ndS(1,4,4,3)=-8.3f; // AT/TG + ndH(2,3,3,4)=-4.1f*1000; ndS(2,3,3,4)=-11.7f; // CG/GT + ndH(2,4,3,3)=-2.8f*1000; ndS(2,4,3,3)=-8.0f; // CT/GG + ndH(3,1,4,4)=-1.3f*1000; ndS(3,1,4,4)=-5.3f; // GA/TT + ndH(3,2,4,3)=-4.4f*1000; ndS(3,2,4,3)=-12.3f; // GC/TG + ndH(3,3,2,4)=3.3f*1000; ndS(3,3,2,4)=10.4f; // GG/CT + ndH(3,3,4,2)=-2.8f*1000; ndS(3,3,4,2)=-8.0f; // GG/TC +// ndH(3,3,4,4)=5.8f*1000; ndS(3,3,4,4)=16.3f; // GG/TT + ndH(3,4,2,3)=-4.4f*1000; ndS(3,4,2,3)=-12.3f; // GT/CG + ndH(3,4,4,1)=-2.5f*1000; ndS(3,4,4,1)=-8.3f; // GT/TA +// ndH(3,4,4,3)=4.1f*1000; ndS(3,4,4,3)=9.5f; // GT/TG + ndH(4,1,3,4)=-0.1f*1000; ndS(4,1,3,4)=-1.7f; // TA/GT + ndH(4,2,3,3)=3.3f*1000; ndS(4,2,3,3)=10.4f; // TC/GG + ndH(4,3,1,4)=-0.1f*1000; ndS(4,3,1,4)=-1.7f; // TG/AT + ndH(4,3,3,2)=-4.1f*1000; ndS(4,3,3,2)=-11.7f; // TG/GC +// ndH(4,3,3,4)=-1.4f*1000; ndS(4,3,3,4)=-6.2f; // TG/GT + ndH(4,4,1,3)=-1.3f*1000; ndS(4,4,1,3)=-5.3f; // TT/AG + ndH(4,4,3,1)=1.0f*1000; ndS(4,4,3,1)=0.9f; // TT/GA +// ndH(4,4,3,3)=5.8f*1000; ndS(4,4,3,3)=16.3f; // TT/GG + + // A-A Mismatches + ndH(1,1,1,4)=4.7f*1000; ndS(1,1,1,4)=12.9f; // AA/AT + ndH(1,1,4,1)=1.2f*1000; ndS(1,1,4,1)=1.7f; // AA/TA + ndH(1,2,1,3)=-2.9f*1000; ndS(1,2,1,3)=-9.8f; // AC/AG + ndH(1,3,1,2)=-0.9f*1000; ndS(1,3,1,2)=-4.2f; // AG/AC + ndH(1,4,1,1)=1.2f*1000; ndS(1,4,1,1)=1.7f; // AT/AA + ndH(2,1,3,1)=-0.9f*1000; ndS(2,1,3,1)=-4.2f; // CA/GA + ndH(3,1,2,1)=-2.9f*1000; ndS(3,1,2,1)=-9.8f; // GA/CA + ndH(4,1,1,1)=4.7f*1000; ndS(4,1,1,1)=12.9f; // TA/AA + + // C-C Mismatches + ndH(1,2,4,2)=0.0f*1000; ndS(1,2,4,2)=-4.4f; // AC/TC + ndH(2,1,2,4)=6.1f*1000; ndS(2,1,2,4)=16.4f; // CA/CT + ndH(2,2,2,3)=3.6f*1000; ndS(2,2,2,3)=8.9f; // CC/CG + ndH(2,2,3,2)=-1.5f*1000; ndS(2,2,3,2)=-7.2f; // CC/GC + ndH(2,3,2,2)=-1.5f*1000; ndS(2,3,2,2)=-7.2f; // CG/CC + ndH(2,4,2,1)=0.0f*1000; ndS(2,4,2,1)=-4.4f; // CT/CA + ndH(3,2,2,2)=3.6f*1000; ndS(3,2,2,2)=8.9f; // GC/CC + ndH(4,2,1,2)=6.1f*1000; ndS(4,2,1,2)=16.4f; // TC/AC + + // G-G Mismatches + ndH(1,3,4,3)=-3.1f*1000; ndS(1,3,4,3)=-9.5f; // AG/TG + ndH(2,3,3,3)=-4.9f*1000; ndS(2,3,3,3)=-15.3f; // CG/GG + ndH(3,1,3,4)=1.6f*1000; ndS(3,1,3,4)=3.6f; // GA/GT + ndH(3,2,3,3)=-6.0f*1000; ndS(3,2,3,3)=-15.8f; // GC/GG + ndH(3,3,2,3)=-6.0f*1000; ndS(3,3,2,3)=-15.8f; // GG/CG + ndH(3,3,3,2)=-4.9f*1000; ndS(3,3,3,2)=-15.3f; // GG/GC + ndH(3,4,3,1)=-3.1f*1000; ndS(3,4,3,1)=-9.5f; // GT/GA + ndH(4,3,1,3)=1.6f*1000; ndS(4,3,1,3)=3.6f; // TG/AG + + // T-T Mismatches + ndH(1,4,4,4)=-2.7f*1000; ndS(1,4,4,4)=-10.8f; // AT/TT + ndH(2,4,3,4)=-5.0f*1000; ndS(2,4,3,4)=-15.8f; // CT/GT + ndH(3,4,2,4)=-2.2f*1000; ndS(3,4,2,4)=-8.4f; // GT/CT + ndH(4,1,4,4)=0.2f*1000; ndS(4,1,4,4)=-1.5f; // TA/TT + ndH(4,2,4,3)=-2.2f*1000; ndS(4,2,4,3)=-8.4f; // TC/TG + ndH(4,3,4,2)=-5.0f*1000; ndS(4,3,4,2)=-15.8f; // TG/TC + ndH(4,4,1,4)=0.2f*1000; ndS(4,4,1,4)=-1.5f; // TT/AT + ndH(4,4,4,1)=-2.7f*1000; ndS(4,4,4,1)=-10.8f; // TT/TA + + // Dangling Ends + ndH(5,1,1,4)=-0.7f*1000; ndS(5,1,1,4)=-0.8f; // $A/AT + ndH(5,1,2,4)=4.4f*1000; ndS(5,1,2,4)=14.9f; // $A/CT + ndH(5,1,3,4)=-1.6f*1000; ndS(5,1,3,4)=-3.6f; // $A/GT + ndH(5,1,4,4)=2.9f*1000; ndS(5,1,4,4)=10.4f; // $A/TT + ndH(5,2,1,3)=-2.1f*1000; ndS(5,2,1,3)=-3.9f; // $C/AG + ndH(5,2,2,3)=-0.2f*1000; ndS(5,2,2,3)=-0.1f; // $C/CG + ndH(5,2,3,3)=-3.9f*1000; ndS(5,2,3,3)=-11.2f; // $C/GG + ndH(5,2,4,3)=-4.4f*1000; ndS(5,2,4,3)=-13.1f; // $C/TG + ndH(5,3,1,2)=-5.9f*1000; ndS(5,3,1,2)=-16.5f; // $G/AC + ndH(5,3,2,2)=-2.6f*1000; ndS(5,3,2,2)=-7.4f; // $G/CC + ndH(5,3,3,2)=-3.2f*1000; ndS(5,3,3,2)=-10.4f; // $G/GC + ndH(5,3,4,2)=-5.2f*1000; ndS(5,3,4,2)=-15.0f; // $G/TC + ndH(5,4,1,1)=-0.5f*1000; ndS(5,4,1,1)=-1.1f; // $T/AA + ndH(5,4,2,1)=4.7f*1000; ndS(5,4,2,1)=14.2f; // $T/CA + ndH(5,4,3,1)=-4.1f*1000; ndS(5,4,3,1)=-13.1f; // $T/GA + ndH(5,4,4,1)=-3.8f*1000; ndS(5,4,4,1)=-12.6f; // $T/TA + ndH(1,5,4,1)=-2.9f*1000; ndS(1,5,4,1)=-7.6f; // A$/TA + ndH(1,5,4,2)=-4.1f*1000; ndS(1,5,4,2)=-13.0f; // A$/TC + ndH(1,5,4,3)=-4.2f*1000; ndS(1,5,4,3)=-15.0f; // A$/TG + ndH(1,5,4,4)=-0.2f*1000; ndS(1,5,4,4)=-0.5f; // A$/TT + ndH(1,1,5,4)=0.2f*1000; ndS(1,1,5,4)=2.3f; // AA/$T + ndH(1,1,4,5)=-0.5f*1000; ndS(1,1,4,5)=-1.1f; // AA/T$ + ndH(1,2,5,3)=-6.3f*1000; ndS(1,2,5,3)=-17.1f; // AC/$G + ndH(1,2,4,5)=4.7f*1000; ndS(1,2,4,5)=14.2f; // AC/T$ + ndH(1,3,5,2)=-3.7f*1000; ndS(1,3,5,2)=-10.0f; // AG/$C + ndH(1,3,4,5)=-4.1f*1000; ndS(1,3,4,5)=-13.1f; // AG/T$ + ndH(1,4,5,1)=-2.9f*1000; ndS(1,4,5,1)=-7.6f; // AT/$A + ndH(1,4,4,5)=-3.8f*1000; ndS(1,4,4,5)=-12.6f; // AT/T$ + ndH(2,5,3,1)=-3.7f*1000; ndS(2,5,3,1)=-10.0f; // C$/GA + ndH(2,5,3,2)=-4.0f*1000; ndS(2,5,3,2)=-11.9f; // C$/GC + ndH(2,5,3,3)=-3.9f*1000; ndS(2,5,3,3)=-10.9f; // C$/GG + ndH(2,5,3,4)=-4.9f*1000; ndS(2,5,3,4)=-13.8f; // C$/GT + ndH(2,1,5,4)=0.6f*1000; ndS(2,1,5,4)=3.3f; // CA/$T + ndH(2,1,3,5)=-5.9f*1000; ndS(2,1,3,5)=-16.5f; // CA/G$ + ndH(2,2,5,3)=-4.4f*1000; ndS(2,2,5,3)=-12.6f; // CC/$G + ndH(2,2,3,5)=-2.6f*1000; ndS(2,2,3,5)=-7.4f; // CC/G$ + ndH(2,3,5,2)=-4.0f*1000; ndS(2,3,5,2)=-11.9f; // CG/$C + ndH(2,3,3,5)=-3.2f*1000; ndS(2,3,3,5)=-10.4f; // CG/G$ + ndH(2,4,5,1)=-4.1f*1000; ndS(2,4,5,1)=-13.0f; // CT/$A + ndH(2,4,3,5)=-5.2f*1000; ndS(2,4,3,5)=-15.0f; // CT/G$ + ndH(3,5,2,1)=-6.3f*1000; ndS(3,5,2,1)=-17.1f; // G$/CA + ndH(3,5,2,2)=-4.4f*1000; ndS(3,5,2,2)=-12.6f; // G$/CC + ndH(3,5,2,3)=-5.1f*1000; ndS(3,5,2,3)=-14.0f; // G$/CG + ndH(3,5,2,4)=-4.0f*1000; ndS(3,5,2,4)=-10.9f; // G$/CT + ndH(3,1,5,4)=-1.1f*1000; ndS(3,1,5,4)=-1.6f; // GA/$T + ndH(3,1,2,5)=-2.1f*1000; ndS(3,1,2,5)=-3.9f; // GA/C$ + ndH(3,2,5,3)=-5.1f*1000; ndS(3,2,5,3)=-14.0f; // GC/$G + ndH(3,2,2,5)=-0.2f*1000; ndS(3,2,2,5)=-0.1f; // GC/C$ + ndH(3,3,5,2)=-3.9f*1000; ndS(3,3,5,2)=-10.9f; // GG/$C + ndH(3,3,2,5)=-3.9f*1000; ndS(3,3,2,5)=-11.2f; // GG/C$ + ndH(3,4,5,1)=-4.2f*1000; ndS(3,4,5,1)=-15.0f; // GT/$A + ndH(3,4,2,5)=-4.4f*1000; ndS(3,4,2,5)=-13.1f; // GT/C$ + ndH(4,5,1,1)=0.2f*1000; ndS(4,5,1,1)=2.3f; // T$/AA + ndH(4,5,1,2)=0.6f*1000; ndS(4,5,1,2)=3.3f; // T$/AC + ndH(4,5,1,3)=-1.1f*1000; ndS(4,5,1,3)=-1.6f; // T$/AG + ndH(4,5,1,4)=-6.9f*1000; ndS(4,5,1,4)=-20.0f; // T$/AT + ndH(4,1,5,4)=-6.9f*1000; ndS(4,1,5,4)=-20.0f; // TA/$T + ndH(4,1,1,5)=-0.7f*1000; ndS(4,1,1,5)=-0.7f; // TA/A$ + ndH(4,2,5,3)=-4.0f*1000; ndS(4,2,5,3)=-10.9f; // TC/$G + ndH(4,2,1,5)=4.4f*1000; ndS(4,2,1,5)=14.9f; // TC/A$ + ndH(4,3,5,2)=-4.9f*1000; ndS(4,3,5,2)=-13.8f; // TG/$C + ndH(4,3,1,5)=-1.6f*1000; ndS(4,3,1,5)=-3.6f; // TG/A$ + ndH(4,4,5,1)=-0.2f*1000; ndS(4,4,5,1)=-0.5f; // TT/$A + ndH(4,4,1,5)=2.9f*1000; ndS(4,4,1,5)=10.4f; // TT/A$ + + return; +} + +void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2) +{ + float l1 = strlen(s1); + float l2 = strlen(s2); + if(l1gcContent = nparam_CountGCContent(s1); + nparm->gcContent /= (l1-2); + } else if (l1>l2) { + nparm->gcContent = nparam_CountGCContent(s2); + nparm->gcContent /= (l2-2); + } else { + nparm->gcContent = nparam_CountGCContent(s1)+nparam_CountGCContent(s2); + nparm->gcContent /= (l1+l2-4); + } +} + +float nparam_CountGCContent(char * seq ) { + int lseq = strlen(seq); + int k; + float count = 0; + for( k=0;k T + return 4; + if (mychar==2) // C -> G + return 3; + if (mychar==3) // G -> T + return 2; + if (mychar==4) // T -> A + return 1; + if (mychar==5) // $ -> $ + return 5; + if (!asnum) + { + if (mychar=='A') // A -> T + return 'T'; + if (mychar=='C') // C -> G + return 'G'; + if (mychar=='G') // G -> T + return 'C'; + if (mychar=='T') // T -> A + return 'A'; + if (mychar=='$') // $ -> $ + return '$'; + } + else + { + if (mychar=='A') // A -> T + return 4; + if (mychar=='C') // C -> G + return 3; + if (mychar=='G') // G -> T + return 2; + if (mychar=='T') // T -> A + return 1; + if (mychar=='$') // $ -> $ + return 5; + } + return '*'; +} + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: static bool CNNParams::isMismatch(char,char) +// +// PURPOSE: Return true if char1 and char2 are not watson-crick pair +// +// PARAMETERS: +// char1 - first character +// char2 - second character +// +// RETURN VALUE: +// bool - true if char1,char2 are Watson-Crick, false otw +// +// REVISION HISTORY +// $ 00sep28 : created LK +// #$ + +int nparam_isMismatch(char c1,char c2) +{ + if (c1+c2==5) + return 0; + else + return 1; +} + + + char nparam_convertNum(char c) + { + if (c=='A') + return 1; + if (c=='C') + return 2; + if (c=='G') + return 3; + if (c=='T') + return 4; + if (c=='$') + return 5; + return 0; + } + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::GetEntropy(char,char,char,char) + // + // PURPOSE: Retrieve Entropy for given NN-Pair from parameter table + // + // PARAMETERS: x0,x1,y0,y1: Pairs to look up in form + // 5'-x0-x1-3' / 3'-y0-y1-5' + // + // RETURN VALUE: + // float: Entropy dS for given NN pair + // + // REVISION HISTORY + // $ 00sep06 : created LK + // 00dec29 : included dangling end parameters + // 01feb07 : rewritten. looks ugly now, but is FAST! inline. + // #$ + + float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1) + { + char nx0=nparam_convertNum(x0); + char nx1=nparam_convertNum(x1); + char ny0=nparam_convertNum(y0); + char ny1=nparam_convertNum(y1); + float answer = ndS(nx0,nx1,ny0,ny1); + //Salt correction Santalucia + if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) { + if(nx0!=5 && 1<= nx1 && nx1<=4) { + answer += 0.5*nparm->kfac; + } + if(ny1!=5 && 1<= ny0 && ny0<=4) { + answer += 0.5*nparm->kfac; + } + } + //Salt correction Owczarzy + if (nparm->saltMethod == SALT_METHOD_OWCZARZY) { + float logk = log(nparm->kplus); + answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk); + } + return answer; + } + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::GetEnthalpy(char,char,char,char) + // + // PURPOSE: Retrieve Enthalpy for given NN-Pair from parameter table + // + // PARAMETERS: x0,x1,y0,y1: Pairs to look up in form + // 5'-x0-x1-3' / 3'-y0-y1-5' + // + // RETURN VALUE: + // float: Enthalpy dH for given NN pair + // + // REVISION HISTORY + // $ 00sep06 : created LK + // $ 00dec29 : included dangling end parameters + // $ 01feb07 : rewritten. looks ugly now, but is FAST! inline. + // #$ + + float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1) + { + char nx0=nparam_convertNum(x0); + char nx1=nparam_convertNum(x1); + char ny0=nparam_convertNum(y0); + char ny1=nparam_convertNum(y1); + return ndH(nx0,nx1,ny0,ny1); + } + + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::CalcTM(float entropy,float enthalpy) + // + // PURPOSE: Return melting temperature TM for given entropy and enthalpy + // Assuming a one-state transition and using the formula + // TM = dH / (dS + R ln(Ct/4)) + // entropy = dS + R ln Ct/4 (must already be included!) + // enthaklpy = dH + // where + // dH = enthalpy + // dS = entropy + // R = Boltzmann factor + // Ct = Strand Concentration + // + // PARAMETERS: + // + // RETURN VALUE: + // + // REVISION HISTORY + // $ 00sep06 : created LK + // $ 01jan07 : modified and corrected + // $ 01feb07 : optimized!!! inline + // $ 01feb09 : changed to include r ln ct in entropy!!! + // #$ + + float nparam_CalcTM(float entropy,float enthalpy) + { + float tm = 0; // absolute zero - return if model fails! + if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact)) + return 0; + if (entropy<0) // avoid division by zero and model errors! + { + tm = enthalpy/entropy;// - kfac; //LKFEB + if (tm<0) + return 0; + } + return tm; + } + + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: void CNNParams::AlterTM(float) + // + // PURPOSE: TM can be altered by a new assignment + // + // PARAMETERS: tm_new: new value for TM + // + // RETURN VALUE: + // + // REVISION HISTORY + // + // #$ + + void nparam_AlterTM(PNNParams nparm, float tm_new) + { + nparm->new_TM = tm_new; + return; + } + + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::CalcG(float entropy, float enthalpy) + // + // PURPOSE: return free energy G for given entropy and enthalpy + // Assuming a one-state transition and using the formula + // G = dH - new_TM * dS + // dH = enthalpy + // dS = entropy + // new_TM = value for the optimal melting temperature of + // the last iteration + // + // PARAMETERS: entropy and enthalpy + // + // RETURN VALUE: free Energy value (dG) for the + // + // REVISION HISTORY + // + // #$ + + float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy) + { + float freeEnergy = -999999999; + if (enthalpy>=forbidden_enthalpy) + return -999999999; + if (entropy<0) // avoid division by zero and model errors! + { + entropy = entropy * -1; + enthalpy = enthalpy * -1; + freeEnergy = enthalpy - nparm->new_TM * entropy; + } + return freeEnergy; + } + + + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::CalcSelfTM(char*) + // + // PURPOSE: Calculate TM for given sequence against its complement + // + // PARAMETERS: + // char* - Sequence to consider + // + // RETURN VALUE: + // float - Melting temperature in degrees Kelvin + // + // REVISION HISTORY + // $ 01jan09 LK : created + // $ 01feb07 LK : inline. + // #$ + + float nparam_CalcSelfTM(PNNParams nparm, char* seq) + { + float thedH = 0; + float thedS = nparam_GetInitialEntropy(nparm); + char c1; + char c2; + char c3; + char c4; + unsigned int i; + for ( i=1;i5) + { + if (c3=='A') + c3=1; + if (c3=='C') + c3=2; + if (c3=='G') + c3=3; + if (c3=='T') + c3=4; + if (c3=='$') + c3=5; + } + if (c4>5) + { + if (c4=='A') + c4=1; + if (c4=='C') + c4=2; + if (c4=='G') + c4=3; + if (c4=='T') + c4=4; + if (c4=='$') + c4=5; + } + + thedH += nparam_GetEnthalpy(nparm, c3,c4,c1,c2); + thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2); + } + + return nparam_CalcTM(thedS,thedH); + } + + float nparam_GetInitialEntropy(PNNParams nparm) + { + return -5.9f+nparm->rlogc; + } + + diff --git a/src/libthermo/nnparams.h b/src/libthermo/nnparams.h new file mode 100644 index 0000000..01948c7 --- /dev/null +++ b/src/libthermo/nnparams.h @@ -0,0 +1,104 @@ +/* + * nnparams.h + * PHunterLib + * + * Created by Tiayyba Riaz on 7/2/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//============================================================================= +// Module: nnparams.h +// Project: Diploma Thesis - Probe Selection for DNA Microarrays +// Type: header file - Nearest Neighbor Parameters / Model. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class CNNParams - Nearest Neighbor Model Parameters +// Author: kaderali +// Date: 9-12/2000 +// Copyright: (c) L. Kaderali, 9/2000 - 12/2000 +// +// Revision History +// $ 00sep07 LK : created +// 00dec29 LK : changed to include dangling end data +// 01jan09 LK : included CalcSelfTM function +// 01feb07 LK : optimized +// #$ +//============================================================================= + +#if !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_) +#define AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include +#include + +#ifdef _pack +#pragma pack(1) +#endif + + +// following defines to simplify coding... +#define ndH(a,b,c,d) nparm->dH[a][b][c][d] +#define ndS(a,b,c,d) nparm->dS[a][b][c][d] +#define forbidden_enthalpy 1000000000000000000.0f +//#define forbidden_enthalpy_div1000 1000000000000000.0f +// forbidden entropy=-rlogc +// #define forbidden_entropy 30.205986374220235304486574573422f +// Boltzmann factor (cal/degrees C*mol) +#define R 1.987f +#define SALT_METHOD_SANTALUCIA 1 +#define SALT_METHOD_OWCZARZY 2 +// Strand concentration (assumption!) (M) +// #define Ct 0.000001f +// r*ln(ct/4) as required by many formulas +//#define rlogc -30.205986374220235304486574573422f +extern float forbidden_entropy; + +//----------------------------------------------------------------------------- +// class CNNParams +//typedef class CNNParams* PNNParams; + +//#pragma GCC visibility push(hidden) +typedef struct CNNParams_st +{ +//public: + float Ct1; + float Ct2; + float rlogc; + float kplus; + float kfac; + int saltMethod; + float gcContent; + float new_TM; // ge‰ndert von ML!!! + //CNNParams(); + //virtual ~CNNParams(); +//private: + float dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign) + float dS[6][6][6][6]; +}CNNParams, * PNNParams; + + //void nparam_InitParams(PNNParams nparm, float c1=0.000001f, float c2=0.000001f, float kp=1, int sm = SALT_METHOD_SANTALUCIA ); + void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm); + void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2); + float nparam_CountGCContent(char * seq ); + char nparam_convertNum(char c); + float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1); + float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1); + float nparam_CalcTM(float entropy,float enthalpy); + void nparam_AlterTM(PNNParams nparm, float tm_new); + float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy); + float nparam_CalcSelfTM(PNNParams nparm, char* seq); + float nparam_GetInitialEntropy(PNNParams nparm) ; + char nparam_getComplement(char mychar, int asnum); + int nparam_isMismatch(char,char); + + + +//#pragma GCC visibility pop +#endif // !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_) diff --git a/src/libthermo/thermalign.P b/src/libthermo/thermalign.P new file mode 100644 index 0000000..0b9b4f3 --- /dev/null +++ b/src/libthermo/thermalign.P @@ -0,0 +1,16 @@ +thermalign.o thermalign.P : thermalign.c /usr/include/math.h \ + /usr/include/architecture/i386/math.h /usr/include/sys/cdefs.h \ + /usr/include/assert.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/stdlib.h \ + /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h thermalign.h \ + nnparams.h /usr/include/string.h diff --git a/src/libthermo/thermalign.c b/src/libthermo/thermalign.c new file mode 100644 index 0000000..7dc7826 --- /dev/null +++ b/src/libthermo/thermalign.c @@ -0,0 +1,716 @@ +/* + * thermalign.cpp + * PHunterLib + * + * Created by Tiayyba Riaz on 7/2/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//============================================================================= +// Module: thermalign.cpp +// Project: Diploma Thesis - Probe Selection for DNA Microarrays +// Type: implementation - Thermodynamic Alignment. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class CThermAlign - Thermodynamic Alignment Algorithm +// Author: kaderali +// Date: 9/2000 - 01/2001 +// Copyright: (c) L. Kaderali, 9/2000 - 01/2001 +// +// Revision History +// $ 00sep04 LK : created +// $ 00dec30 LK : changed to do local alignment of probe against +// one entire sequence +// $ 01feb07 LK : optimized! +// $ 01aug06 LK : corrected, included salt and concentration input; +// $ made true local alignment (problem with initial mismatches!) +// #$ +//============================================================================= + +#include +#include +//#include +#include +#include + +#include "thermalign.h" + + + #define dH(a,b,c) (*(thrma->dH+((b)*(thrma->maxseq1len+1)*3+(a)*3+(c)))) + #define dS(a,b,c) (*(thrma->dS+((b)*(thrma->maxseq1len+1)*3+(a)*3+(c)))) + + +/////////////////////////////////////////////////////////////////////////////// +// Construction/Destruction +/////////////////////////////////////////////////////////////////////////////// + +//------------------------ Constructor ---------------------------------------- +// Initialize Thermalign Object. This will allocate memory for the dynamic +// programming matrices for both entropy and enthalpy of the present +// alignment. The max length of the two sequences to align has to be passed +// to the routine. Also, an initialized CNNParams Object must be passed to +// CThermAlign. Object will be initialized only once, sequences can be +// passed to it later. + +//int s1lmax,s2lmax; // global requiered for DEBUGGING + +void therm_initCThermAlign(PThermAlign thrma, int ALength, int BLength, PNNParams myNNParams) { + // Create dynamic programming matrices for entropy and enthalpy. + // The first two dimensions are for the two strings respectively, the + // third dimensions differentiates the alignment according to its end: + // type 0 Alignments will be those that align x(i+1) with y(j+1), + // type 1 Alignments align x(i+1) with a gap + // type 2 Alignments align y(i+1) with a gap + // All three of these types must separately be considered to select the + // optimum alignment in the next iteration. + // The alignment algorithm proceeds analogous to the well known + // Smith-Waterman algorithm for global alignments. + + thrma->dH = (float*)calloc ((ALength+1)*(BLength+1)*3, sizeof (float)); + thrma->dS = (float*)calloc ((ALength+1)*(BLength+1)*3, sizeof (float)); +// s1lmax=ALength; +// s2lmax=BLength; + + if ((thrma->dH==NULL)||(thrma->dS==NULL)) + { + printf("Fatal Error: Out of memory!\n"); + exit(-1); + } + + thrma->NNParams = myNNParams; + thrma->seq1len = ALength; + thrma->maxseq1len = thrma->seq1len; + thrma->seq2len = BLength; + + // set dH / dS entries all to zero! + #ifdef _pack + // this is FAST, but works only if #pragma pack(1) has been set... + memset(dH,0,(ALength+1)*(BLength+1)*3*sizeof(float)); + memset(dS,0,(ALength+1)*(BLength+1)*3*sizeof(float)); + #else + // the following is slower, but works also when padding bytes have + // been inserted in the array for alignment by the compiler... + // time is not really critical here, as this initialization is done + // only once... + int a, b; + for (a=0;a<=ALength;a++) + for (b=0;b<=BLength;b++) + { + dH(a,b,0)=0.0f; + dH(a,b,1)=0.0f; + dH(a,b,2)=0.0f; + dS(a,b,0)=0.0f; + dS(a,b,1)=0.0f; + dS(a,b,2)=0.0f; + } + #endif + therm_InitBorder(thrma); // also need to do only once... +// forbidden_entropy = (-(NNParams->R)*(float)log((NNParams->Ct)/4.0f)); + return; +} + +//------------------------ Destructor ----------------------------------------- +// Free memory + +void therm_finiCThermAlign(PThermAlign thrma) +{ + free (thrma->dH); + free (thrma->dS); + return; +} + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void CThermAlign::InitStrings(char* s1, +// char* s2, +// int s1len, +// int s2len); +// +// PURPOSE: Initialize strings for alignment table! +// +// PARAMETERS: +// s1: Target sequence +// s2: Probe sequence +// s1len: Target sequence length +// s2len: Probe sequence length +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 01jan03 LK : created +// $ 01feb07 LK : removed maximum local stuff +// #$ + +void therm_InitStrings(PThermAlign thrma, char* s1,char* s2,int s1len ,int s2len,int minxpos) +{ + thrma->seq1=s1; thrma->seq1len=s1len; + thrma->seq2=s2; thrma->seq2len=s2len; + nparam_UpdateParams(thrma->NNParams, s1,s2); +/* lk01feb07: removed maxloc-stuff + // if maximum local is not in reused subtable, need to reset! + if ((maxlocj>=minxpos)||(maxloci!=seq1len)) + { + maxloci=0; maxlocj=0; maxloctm=0; + } +*/ +// InitBorder(); //lk01feb07: removed, as it suffices if this is done once in constructor + return; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void CThermAlign::InitBorder(); +// +// PURPOSE: Initialize the lower and left border of the dynamic +// programming table. +// +// PARAMETERS: +// none +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 00sep06 LK : created +// $ 00dec30 LK : complete revision - simply init table now, no +// more saving etc... +// $ 01feb07 LK : removed local alignment stuff and optimized +// #$ + +void therm_InitBorder(PThermAlign thrma) +{ + // Initialization for new alignment! + thrma->maxloctm = 0; // maximum melting temperature found (kelvin) + thrma->maxloci = 0; + thrma->maxlocj = 0; + thrma->maxloct = 0; + int i, j; + + for ( i=0; i<=thrma->seq1len; i++) + { + dH(i,0,0) = 0.0f; dH(i,0,1) = 0.0f; dH(i,0,2) = 0.0f; + dS(i,0,0) = nparam_GetInitialEntropy(thrma->NNParams); dS(i,0,1) = nparam_GetInitialEntropy(thrma->NNParams); + dS(i,0,2) = nparam_GetInitialEntropy(thrma->NNParams); + } + for ( j=1; j<=thrma->seq2len; j++) + { + dH(0,j,0) = 0.0f; dH(0,j,1) = 0.0f; dH(0,j,2) = 0.0f; + dS(0,j,0) = nparam_GetInitialEntropy(thrma->NNParams); dS(0,j,1) = nparam_GetInitialEntropy(thrma->NNParams); + dS(0,j,2) = nparam_GetInitialEntropy(thrma->NNParams); + } + + return; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void CThermAlign::CalculateCell(int i, int j); +// +// PURPOSE: Calculate entropy and enthalpy for a given cell using a variant +// of the dynamic programming recursion, choosing greedy for +// tm->max +// x(i,j,0) = min/max {x(i-1,j-1,t) | for t in {0,1,2}} + newnn +// x(i,j,1) = min/max {x(i-1,j-1,0) + newnn, x(i-1,j,1)} +// x(i,j,2) = min/max {x(i-1,j-1,0) + newnn, x(i, j-1,2)} +// where newnn is the contribution of the newly added nearest +// neighbor term. The underlying assumption is that two +// consecutive gaps do not contribute to x. (Can however be +// easily changed, as long as an affine relationship exists +// between the length of the gap and the associated "cost"). +// To estimate the melting temperature TM from Enthalpy and +// Entropy, the following formula is being used: +// TM = dH / (dS + R ln CT) +// where +// TM is the temperature at which 50% of the duplex +// are dissociated +// dH is the Enthalpy +// dS is the Entropy +// R is the gas constant +// CT is the concentration (assumed constant) +// and ln means the natural logarithm. +// One drawback is that the two separate computations of +// Entropy and Enthalpy may result in different alignments. +// We do not want that, and thus pick the one with the higher +// melting temperature TM. However, it must be warned that +// depending on the underlying nearest neighbor parameters this +// may only approximate the "true" optimum result! +// +// PARAMETERS: +// int i, +// int j - coordinates of the dp cell to calculate (=position in +// string). Requieres that (i-1,j), (i,j-1), (i-1,j-1) +// are known. +// +// RETURN VALUE: +// - will be written directly to dH/dS array in this object +// +// REVISION HISTORY +// $ 00sep06 LK : created +// $ 00dec30 LK : modified to align probe with full target (local +// alignment) +// $ 01feb07 LK : optimized and removed local alignment stuff +// $ 01feb08 LK : optimized further and merged into CalculateTable +// #$ +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: CThermAlign::CalculateTable(); +// +// PURPOSE: Compute the entire dynamic programming table cell by cell. +// Requieres that InitBorder has been called. +// Computations will be done in the usual rowwise manner. +// +// PARAMETERS: +// int startRow: Row that computation shall commence at! +// +// RETURN VALUE: +// void - Data will be written directly to dH and dS in this object +// +// REVISION HISTORY +// $ 00sep06 : created LK +// 00dec31 : modified to begin at a specified row! +// #$ + +void therm_CalculateTable(PThermAlign thrma, int startRow) +{ +// #define forbidden_enthalpy 1000000000000000000.0f + register int i; + register int iminusone; + register int j; + register int jminusone; + // Determine previous character! + register char prevchari; + register char prevcharj; + register char seq1char; + register char seq2char; + register float entropy1; + register float entropy2; + register float entropy3; + register float enthalpy1; + register float enthalpy2; + register float enthalpy3; + register float tm1; + register float tm2; + register float tm3; + float maxtm; +// ofstream dboutstream("debug.out"); +// assert(startRow>0); + for (j=startRow; j<=thrma->seq2len; j++) + { + if (j>1) + prevcharj = thrma->seq2[j-2]; + else + prevcharj = 0; + jminusone = j - 1; + seq2char = thrma->seq2[jminusone]; + for (i=1; i<=thrma->seq1len; i++) + { + // local variables + if (i>1) + prevchari = thrma->seq1[i-2]; + else + prevchari = 0; + iminusone = i - 1; + seq1char=thrma->seq1[iminusone]; + + // --------------- Upper cell: Alignment of seq1[i] with seg2[j] + entropy1 = dS(iminusone,jminusone,0); + enthalpy1 = dH(iminusone,jminusone,0) ; + entropy2 = dS(iminusone,jminusone,1); + enthalpy2 = dH(iminusone,jminusone,1); + entropy3 = dS(iminusone,jminusone,2); + enthalpy3 = dH(iminusone,jminusone,2); + if (enthalpy1NNParams, prevchari, seq1char, prevcharj, seq2char); + enthalpy1 += nparam_GetEnthalpy(thrma->NNParams, prevchari, seq1char, prevcharj, seq2char); + } + if (enthalpy2NNParams, prevchari, seq1char, 0, seq2char); // 0 is gap! + entropy2 += nparam_GetEntropy(thrma->NNParams, prevchari, seq1char, 0, seq2char); + } + if (enthalpy3NNParams, 0, seq1char, prevcharj, seq2char); + entropy3 += nparam_GetEntropy(thrma->NNParams, 0, seq1char, prevcharj, seq2char); + } + // choose optimum combination + tm1 = nparam_CalcTM(entropy1, enthalpy1); + tm2 = nparam_CalcTM(entropy2, enthalpy2); + tm3 = nparam_CalcTM(entropy3, enthalpy3); + if ((tm1>=tm2)&&(tm1>=tm3)) + { + dS(i,j,0) = entropy1; + dH(i,j,0) = enthalpy1; + maxtm=tm1; + } + else if (tm2>=tm3) + { + dS(i,j,0) = entropy2; + dH(i,j,0) = enthalpy2; + maxtm=tm2; + } + else + { + dS(i,j,0) = entropy3; + dH(i,j,0) = enthalpy3; + maxtm=tm3; + } + // set to zero if alignment so far is mismatches only (tm will be zero -> no need to reset maxtm)! + if (dH(i,j,0)==0) + dS(i,j,0)=nparam_GetInitialEntropy(thrma->NNParams); + // check if local maximum found! + if (((i==thrma->seq1len)||(j==thrma->seq2len))&&(maxtm>=thrma->maxloctm)) + { + thrma->maxloctm=maxtm; + thrma->maxloci=i; + thrma->maxlocj=j; + thrma->maxloct=0; + } + // set to zero if alignment so far is mismatches only! + if (dH(i,j,0)==0) + dS(i,j,0)=nparam_GetInitialEntropy(thrma->NNParams); + + // --------------- Middle cell: Alignment of seq1[i] with '-' + // following changes lk 01jan08 + // we do not allow $- in the beginning, therefore set to forbidden if + // j=1 + if (j==1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + // also, disallow -$ in the end, therefore forbid if j=seq2len-1 + else if (j==thrma->seq2len-1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + else + { + // end lk01jan08 + // -- entropy + entropy1 = dS(iminusone,j,0); + entropy2 = dS(iminusone,j,1); + enthalpy1 = dH(iminusone,j,0); + enthalpy2 = dH(iminusone,j,1); + if (enthalpy1NNParams, prevchari, seq1char, seq2char, 0); + enthalpy1 += nparam_GetEnthalpy(thrma->NNParams, prevchari, seq1char, seq2char, 0); + } + if (enthalpy2NNParams, prevchari, seq1char, 0, 0); + enthalpy2 += nparam_GetEnthalpy(thrma->NNParams, prevchari, seq1char, 0, 0); + } + } + // -- choose optimum combination + tm1 = nparam_CalcTM(entropy1, enthalpy1); + tm2 = nparam_CalcTM(entropy2, enthalpy2); + if (tm1>=tm2) + { + dS(i,j,1)=entropy1; + dH(i,j,1)=enthalpy1; + maxtm=tm1; + } + else + { + dS(i,j,1)=entropy2; + dH(i,j,1)=enthalpy2; + maxtm=tm2; + } + // check if local maximum found! + if (((i==thrma->seq1len)||(j==thrma->seq2len))&&(maxtm>=thrma->maxloctm)) + { + thrma->maxloctm=maxtm; + thrma->maxloci=i; + thrma->maxlocj=j; + thrma->maxloct=1; + } + // set to zero if alignment so far is mismatches only! + if (dH(i,j,1)==0) + dS(i,j,1)=nparam_GetInitialEntropy(thrma->NNParams); + + // --------------- Lower cell: Alignment of '-' with seq2[j] + // following changes lk 01jan08 + // we do not allow $- in the beginning, therefore set to forbidden if + // i=1 + if (i==1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + // also, disallow -$ in the end, therefore forbid if i=seq1len-1 + else if (i==thrma->seq1len-1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + else + { + // end lk01jan08 + entropy1 = dS(i,jminusone,0); + entropy2 = dS(i,jminusone,2); + enthalpy1 = dH(i,jminusone,0); + enthalpy2 = dH(i,jminusone,2); + if (enthalpy1NNParams, seq1char, 0, prevcharj, seq2char); + enthalpy1 += nparam_GetEnthalpy(thrma->NNParams, seq1char, 0, prevcharj, seq2char); + } + if (enthalpy2NNParams, 0, 0, prevcharj, seq2char); + enthalpy2 += nparam_GetEnthalpy(thrma->NNParams, 0, 0, prevcharj, seq2char); + } + } + // -- choose optimum combination + tm1 = nparam_CalcTM(entropy1, enthalpy1); + tm2 = nparam_CalcTM(entropy2, enthalpy2); + if (tm1>=tm2) + { + dS(i,j,2)=entropy1; + dH(i,j,2)=enthalpy1; + maxtm=tm1; + } + else + { + dS(i,j,2)=entropy2; + dH(i,j,2)=enthalpy2; + maxtm=tm2; + } + // check if local maximum found! + if (((i==thrma->seq1len)||(j==thrma->seq2len))&&(maxtm>=thrma->maxloctm)) + { + thrma->maxloctm=maxtm; + thrma->maxloci=i; + thrma->maxlocj=j; + thrma->maxloct=2; + } + // set to zero if alignment so far is mismatches only! + if (dH(i,j,2)==0) + dS(i,j,2)=nparam_GetInitialEntropy(thrma->NNParams); + + + /* + dboutstream<<"After cell ("<=tm1)&&(tm0>=tm2)) + entropy = dS(i,j,0); + else if ((tm1>=tm0)&&(tm1>=tm2)) + entropy = dS(i,j,1); + else + entropy = dS(i,j,2); + + return entropy; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetEnthalpy(int i, int j); +// +// PURPOSE: Return Enthalpy for given cell +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Enthalpy (dS) +// +// REVISION HISTORY +// $ 00sep23 : created LK +// #$ + +float therm_GetEnthalpy(PThermAlign thrma, int i, int j) +{ + float enthalpy; + + // first, need to determine optimum values for dG and dH + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(dS(i,j,0),dH(i,j,0)); + tm1 = nparam_CalcTM(dS(i,j,1),dH(i,j,1)); + tm2 = nparam_CalcTM(dS(i,j,2),dH(i,j,2)); + if ((tm0>=tm1)&&(tm0>=tm2)) + enthalpy = dH(i,j,0); + else if ((tm1>=tm0)&&(tm1>=tm2)) + enthalpy = dH(i,j,1); + else + enthalpy = dH(i,j,2); + + return enthalpy; +} + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergy(int i, int j); +// +// PURPOSE: Return free Energy for given cell at melting temp +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - free Energy value (dG) at melting temperature +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetFreeEnergy(PThermAlign thrma, int i, int j) +{ + return therm_GetFreeEnergyK(thrma, i,j,therm_GetMeltingTempK(thrma, i,j)); +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergyK(int i, int j, float t); +// +// PURPOSE: Return free Energy for given cell at given temperature +// +// PARAMETERS: +// i, j - Cell coordinates +// t - Temperature in Kelvin +// +// RETURN VALUE: +// float - free Energy value (dG) at given temperature (Kelvin) +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetFreeEnergyK(PThermAlign thrma, int i, int j, float t) +{ + // dG = dH - T * dS + + float entropy; + float enthalpy; + + // first, need to determine optimum values for dG and dH + entropy = therm_GetEntropy(thrma, i,j); + enthalpy = therm_GetEnthalpy(thrma, i,j); + + return enthalpy - t * entropy; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergyC(int i, int j, int t); +// +// PURPOSE: Return free Energy for given cell at given temperature +// +// PARAMETERS: +// i, j - Cell coordinates +// t - Temperature in Celsius +// +// RETURN VALUE: +// float - free Energy value (dG) at melting temperature +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetFreeEnergyC(PThermAlign thrma, int i, int j, float t) +{ + // dG = dH - T * dS + + float entropy; + float enthalpy; + + // first, need to determine optimum values for dG and dH + entropy = therm_GetEntropy(thrma, i,j); + enthalpy = therm_GetEnthalpy(thrma, i,j); + + return enthalpy - (t + 273.15f) * entropy; +} + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float CThermAlign::GetMeltingTempC(int i, int j) +// +// PURPOSE: Calculate TM for given cell. Return in Celsius. +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Melting Temperature TM +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetMeltingTempC(PThermAlign thrma, int i, int j) +{ + return therm_GetMeltingTempK(thrma, i,j) - 273.15f; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float CThermAlign::GetMeltingTempK(int i, int j) +// +// PURPOSE: Calculate TM for given cell. Return in Kelvin. +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Melting Temperature TM +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetMeltingTempK(PThermAlign thrma, int i, int j) +{ + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(dS(i,j,0), dH(i,j,0)); + tm1 = nparam_CalcTM(dS(i,j,1), dH(i,j,1)); + tm2 = nparam_CalcTM(dS(i,j,2), dH(i,j,2)); + float maxtm; + if ((tm0>=tm1)&&(tm0>=tm2)) + maxtm=tm0; + else if ((tm1>=tm0)&&(tm1>=tm2)) + maxtm=tm1; + else + maxtm=tm2; + if (maxtm<0) + maxtm=0; + return maxtm; +} + diff --git a/src/libthermo/thermalign.h b/src/libthermo/thermalign.h new file mode 100644 index 0000000..65f368a --- /dev/null +++ b/src/libthermo/thermalign.h @@ -0,0 +1,105 @@ +//============================================================================= +// Module: thermalign.h +// Project: Diploma Thesis - Probe Selection for DNA Microarrays +// Type: header file - Thermodynamic Alignment. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class CThermAlign - Thermodynamic Alignment Algorithm +// Author: kaderali +// Date: 9/2000 - 12/2000 +// Copyright: (c) L. Kaderali, 9/2000 - 12/2000 +// +// Revision History +// $ 00sep04 LK : created +// $ 00dec30 LK : changed to do local alignment of probe against +// one entire sequence +// $ 01feb07 LK : optimized +// #$ +//============================================================================= + +#if !defined(AFX_THERMALIGN_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_) +#define AFX_THERMALIGN_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_ + +//#if _MSC_VER > 1000 +//#pragma once +//#endif // _MSC_VER > 1000 + +//#include +//#include +//#include +#include "nnparams.h" // Nearest Neighbor Parameters +//#include "../gsfxtree/gsfxtree.h" // Suffix Tree Stuff + +//using namespace std; + +//#ifdef _pack +//#pragma pack(1) +//#endif + +//----------------------------------------------------------------------------- +// class CThermAlign +//#pragma GCC visibility push(hidden) +//typedef class CThermAlign* PThermAlign; + +typedef struct CThermAlign_t +{ +//public: + #ifdef _output_alignment +/* void printAlignment(ostream &outputStream); + void OutputAlignment(ostream &outputStream); + bool OutputAlignment(ostream &outputStream, int, int, int, bool local=false); + void OutputLocalAlignment(ostream &outputStream); + void PrintDPTable(ostream&);*/ + #endif +// lk01feb07: removed maxlocstuff as not requiered by thermtreealign... + float maxloctm; // maximum local temperature found + int maxloci; // i position thereof + int maxlocj; // j position thereof + int maxloct; // and type of maximum alignment! + int targetNumber; // id number of target sequence +// PGSfxLeaf probeNode; // identifier for probe sequence + char *seq1; // Sequence 1 + char *seq2; // Sequence 2 + int seq1len; // Length of Sequence 1 + int seq2len; // Length of Sequence 2 + int maxseq1len; // length of longest target... +//private: + float *dH; // Dynamic Programming Table for Entropy + float *dS; // Dynamic Programming Table for Enthalpy + PNNParams NNParams; // Nearest Neighbor parameters +// float forbidden_entropy; +#ifdef _output_alignment + //ostrstream *s1aptr, *s2aptr, *atypptr; +// Used to buffer aligned sequences (on output) + //ostrstream s1align; + //ostrstream s2align; + //ostrstream aligntype; // insert, deletion, match, unmatch (for output) + // same for local alignment: + /* removed lk00jan08: use global instead! + ostrstream ls1align; + ostrstream ls2align; + ostrstream laligntype; // insert, deletion, match, unmatch (for output) + */ +#endif +}CThermAlign, * PThermAlign; +//#pragma GCC visibility pop + +float therm_GetEntropy(PThermAlign thrma, int, int); +float therm_GetEnthalpy(PThermAlign thrma, int, int); +float therm_GetFreeEnergy(PThermAlign thrma, int, int); +float therm_GetFreeEnergyK(PThermAlign thrma, int, int, float); +float therm_GetFreeEnergyC(PThermAlign thrma, int, int, float); +float therm_GetMeltingTempC(PThermAlign thrma, int, int); +float therm_GetMeltingTempK(PThermAlign thrma, int, int); +void therm_initCThermAlign(PThermAlign thrma, int, int, PNNParams); +void therm_finiCThermAlign(PThermAlign thrma); +//void therm_InitStrings(PThermAlign thrma, char*,char*,int,int,int=0); +void therm_InitStrings(PThermAlign thrma, char*,char*,int,int,int); +void therm_InitBorder(PThermAlign thrma); +//void therm_CalculateTable(PThermAlign thrma, int=1); +void therm_CalculateTable(PThermAlign thrma, int); + + +#endif // !defined(AFX_THERMALIGN_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_)