From f142d0e904126c4a15bc4770250d99f04dcbe170 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 7 Jul 2009 12:35:17 +0000 Subject: [PATCH] Added thermodynamics properties git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@221 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- src/Makefile | 9 +- src/ecoprimer.c | 87 +++- src/global.mk | 2 +- src/libthermo/Makefile | 26 ++ src/libthermo/dinkelbach.P | 5 + src/libthermo/dinkelbach.c | 76 ++++ src/libthermo/dinkelbach.h | 36 ++ src/libthermo/galign.P | 16 + src/libthermo/galign.c | 685 +++++++++++++++++++++++++++++++ src/libthermo/galign.h | 97 +++++ src/libthermo/libthermo.P | 15 + src/libthermo/libthermo.a | Bin 0 -> 37528 bytes src/libthermo/libthermo.c | 190 +++++++++ src/libthermo/libthermo.h | 75 ++++ src/libthermo/nnparams.P | 5 + src/libthermo/nnparams.c | 812 +++++++++++++++++++++++++++++++++++++ src/libthermo/nnparams.h | 104 +++++ src/libthermo/thermalign.P | 16 + src/libthermo/thermalign.c | 716 ++++++++++++++++++++++++++++++++ src/libthermo/thermalign.h | 105 +++++ 20 files changed, 3069 insertions(+), 8 deletions(-) create mode 100644 src/libthermo/Makefile create mode 100644 src/libthermo/dinkelbach.P create mode 100644 src/libthermo/dinkelbach.c create mode 100644 src/libthermo/dinkelbach.h create mode 100644 src/libthermo/galign.P create mode 100644 src/libthermo/galign.c create mode 100644 src/libthermo/galign.h create mode 100644 src/libthermo/libthermo.P create mode 100644 src/libthermo/libthermo.a create mode 100644 src/libthermo/libthermo.c create mode 100644 src/libthermo/libthermo.h create mode 100644 src/libthermo/nnparams.P create mode 100644 src/libthermo/nnparams.c create mode 100644 src/libthermo/nnparams.h create mode 100644 src/libthermo/thermalign.P create mode 100644 src/libthermo/thermalign.c create mode 100644 src/libthermo/thermalign.h diff --git a/src/Makefile b/src/Makefile index a8011c7..4f75583 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,10 +6,12 @@ PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC)) SRCS= $(PRIMER_SRC) -LIB= -lecoprimer -lecoPCR -lz -lm +LIB= -lecoprimer -lecoPCR -lthermo -lz -lm LIBFILE= libecoPCR/libecoPCR.a \ - libecoprimer/libecoprimer.a + libecoprimer/libecoprimer.a \ + libthermo/libthermo.a \ + include global.mk @@ -41,6 +43,8 @@ libecoPCR/libecoPCR.a: libecoprimer/libecoprimer.a: $(MAKE) -C libecoprimer +libthermo/libthermo.a: + $(MAKE) -C libthermo ######## # @@ -53,6 +57,7 @@ clean: rm -f $(EXEC) $(MAKE) -C libecoPCR clean $(MAKE) -C libecoprimer clean + $(MAKE) -C libthermo clean diff --git a/src/ecoprimer.c b/src/ecoprimer.c index 3c135fb..3f481b0 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -6,6 +6,7 @@ */ #include "libecoprimer/ecoprimer.h" +#include "libthermo/libthermo.h" #include #include #include @@ -13,14 +14,48 @@ #include #include #include +#include #define VERSION "0.3" /* TR: by default, statistics are made on species level*/ #define DEFAULTTAXONRANK "species" static int cmpprintedpairs(const void* p1,const void* p2); +//float _Z27calculateMeltingTemperature_ (char * seq1, char * seq2); +void* lib_handle = NULL; +float (*calcMelTemp)(char*, char*); +void openlibman () +{ + // Open the library. + char* lib_name = "./libPHunterLib.dylib"; + lib_handle = dlopen(lib_name, RTLD_NOW); + if (lib_handle) { + fprintf(stderr, "[%s] dlopen(\"%s\", RTLD_NOW): Successful\n", __FILE__, lib_name); + } + else { + fprintf(stderr, "[%s] Unable to open library: %s\n", + __FILE__, dlerror()); + exit(EXIT_FAILURE); + } + + // Get the symbol addresses. + calcMelTemp = dlsym(lib_handle, "_Z27calculateMeltingTemperaturePcS_"); + if (calcMelTemp) { + fprintf(stderr, "[%s] dlsym(lib_handle, \"addRating\"): Successful\n", __FILE__); + } + else { + fprintf(stderr, "[%s] Unable to get symbol: %s\n", + __FILE__, dlerror()); + exit(EXIT_FAILURE); + } +} + +void closlibman () +{ + if (lib_handle) dlclose(lib_handle); +} /* ----------------------------------------------- */ /* printout help */ /* ----------------------------------------------- */ @@ -162,8 +197,11 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) bool_t goodtmp; bool_t strand; uint32_t i; + float temp; char *c; + char p1[32]; + char p2[32]; if (!asdirect1) w1=ecoComplementWord(w1,options->primer_length); @@ -189,8 +227,41 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) printf("%6d\t",index); - printf("%s\t",ecoUnhashWord(w1,options->primer_length)); - printf("%s",ecoUnhashWord(w2,options->primer_length)); + c = ecoUnhashWord(w1,options->primer_length); + strcpy (p1, c); + c = ecoUnhashWord(w2,options->primer_length); + strcpy (p2, c); + + printf("%s\t", p1); + printf("%s", p2); + + /*For thermo: first word should be 5'-3'*/ + /*if (!asdirect1) + { + wtmp = ecoComplementWord(w1,options->primer_length); + c = ecoUnhashWord(wtmp,options->primer_length); + strcpy (p1, c); + }*/ + /*For thermo: first word should be 5'-3'*/ + /*if (asdirect2) + { + wtmp = ecoComplementWord(w2,options->primer_length); + c = ecoUnhashWord(wtmp,options->primer_length); + strcpy (p2, c); + }*/ + + /*temp = calculateMeltingTemperature (p1, p2);*/ + //float temp = calculateMeltingTemperature ("CTGTTTACCAAAAACATC", "GGTCTGAACTCAGATCAC"); + + temp = calculateMeltingTemperatureBasic(p1); + printf ("\t%4.3f", temp); + + temp = calculateMeltingTemperatureBasic(p2); + printf ("\t%4.3f", temp); + + printf ("\t%d",countGCContent(p1)); + + printf ("\t%d",countGCContent(p2)); printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]); @@ -401,6 +472,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) for (i=0;i < count;i++) printapair(i,sortedpairs[i],options); + } @@ -479,7 +551,12 @@ int main(int argc, char **argv) //printcurrenttime(); //return 0; - + //openlibman (); + //float temp = calculateMeltingTemperature ("GGTCTGAACTCAGATCAC", "CTGTTTACCAAAAACATC"); + //float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC"); + //printf ("temp = %f\n", temp); + //return 0; + initoptions(&options); while ((carg = getopt(argc, argv, "hfvcUDSd:l:L:e:i:r:R:q:3:s:x:t:O:")) != -1) { @@ -715,11 +792,11 @@ int main(int argc, char **argv) /*TR: Added*/ pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options); - + //fprintf(stderr,"buildPrimerPairs finished\n"); // setoktaxforspecificity (&pairs); printpairs (pairs, &options,taxonomy); - + // closlibman (); //ECOFREE(pairs.pairs,"Free pairs table"); diff --git a/src/global.mk b/src/global.mk index 85b8672..a7bcfef 100644 --- a/src/global.mk +++ b/src/global.mk @@ -1,5 +1,5 @@ MACHINE=MAC_OS_X -LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer +LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer -Llibthermo MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $< CC=gcc diff --git a/src/libthermo/Makefile b/src/libthermo/Makefile new file mode 100644 index 0000000..fefcbc7 --- /dev/null +++ b/src/libthermo/Makefile @@ -0,0 +1,26 @@ + +SOURCES = dinkelbach.c \ + galign.c \ + libthermo.c \ + nnparams.c \ + thermalign.c + +SRCS=$(SOURCES) + +OBJECTS= $(patsubst %.c,%.o,$(SOURCES)) + +LIBFILE= libthermo.a +RANLIB= ranlib + + +include ../global.mk + + +all: $(LIBFILE) + +clean: + rm -rf $(OBJECTS) $(LIBFILE) + +$(LIBFILE): $(OBJECTS) + ar -cr $@ $? + $(RANLIB) $@ diff --git a/src/libthermo/dinkelbach.P b/src/libthermo/dinkelbach.P new file mode 100644 index 0000000..179db99 --- /dev/null +++ b/src/libthermo/dinkelbach.P @@ -0,0 +1,5 @@ +dinkelbach.o dinkelbach.P : dinkelbach.c nnparams.h /usr/include/math.h \ + /usr/include/architecture/i386/math.h /usr/include/sys/cdefs.h \ + /usr/include/string.h /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h galign.h \ + dinkelbach.h diff --git a/src/libthermo/dinkelbach.c b/src/libthermo/dinkelbach.c new file mode 100644 index 0000000..9bacffc --- /dev/null +++ b/src/libthermo/dinkelbach.c @@ -0,0 +1,76 @@ +/* + * dinkelbach.cpp + * PHunterLib + * + * Created by Tiayyba Riaz on 6/7/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//#include +//#include +//#include +#include "nnparams.h" +#include "galign.h" +#include "dinkelbach.h" + +void dnkl_initdinkelbach(pdinkelbach dnkl, PNNParams myDiParams, GGAlign mGAlign) +{ + dnkl->myDinkParams = myDiParams; + dnkl->myGAlign = mGAlign; +} + + +int dnkl_iteration(pdinkelbach dnkl, float TempK) +{ + int iteration = 0; + nparam_AlterTM(dnkl->myDinkParams, TempK); + float precursor_oldTM = TempK; + float newTM = TempK; + float oldTM = TempK, lambda, oldlambda; + lambda = 999999999999999999.9; + int positive=0; + + do + { + iteration++; + galn_InitBorder(dnkl->myGAlign); + galn_CalculateTable(dnkl->myGAlign, 1); + oldlambda=lambda; + lambda =galn_GetFreeEnergyK(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj,newTM); + if(lambda < 0.0) + { + if(!positive) + { + //Initial melting temperature greater than expected + newTM = galn_GetEnthalpy(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj)/galn_GetEntropy(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj)-1; + lambda = oldlambda; + nparam_AlterTM(dnkl->myDinkParams, newTM); + } + else + { + precursor_oldTM = oldTM; + } + } + else + { + positive = 1; + oldTM = newTM; + newTM = galn_GetMeltingTempK(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj); + nparam_AlterTM(dnkl->myDinkParams, newTM); + } + } + while(!positive || (newTM - oldTM > 0.001 && (lambda>0.0)&&(lambda 0.001 && ((lambda<0.0)||(lambda>oldlambda))) + { + iteration++; + nparam_AlterTM(dnkl->myDinkParams, precursor_oldTM); + galn_InitBorder(dnkl->myGAlign); + galn_CalculateTable(dnkl->myGAlign, 1); + lambda = galn_GetFreeEnergyK(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj,precursor_oldTM); + newTM = galn_GetMeltingTempK(dnkl->myGAlign, dnkl->myGAlign->maxloci,dnkl->myGAlign->maxlocj); + nparam_AlterTM(dnkl->myDinkParams, newTM); + } + return iteration; +} diff --git a/src/libthermo/dinkelbach.h b/src/libthermo/dinkelbach.h new file mode 100644 index 0000000..c90c8c1 --- /dev/null +++ b/src/libthermo/dinkelbach.h @@ -0,0 +1,36 @@ +/* + * dinkelbach.h + * PHunterLib + * + * Created by Tiayyba Riaz on 7/2/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//Module: Achievement of the Dinkelbach algorithm + +#ifndef __dinkelbach_h +#define __dinkelbach_h + +//#include +//#include +//#include +#include "nnparams.h" +#include "galign.h" + +//using namespace std; + +//typedef class dinkelbach* DINKEL; +//#pragma GCC visibility push(hidden) +typedef struct dinkelbach_st +{ + PNNParams myDinkParams; + GGAlign myGAlign; + +}dinkelbach, * pdinkelbach; +//#pragma GCC visibility pop + +void dnkl_initdinkelbach(pdinkelbach dnkl, PNNParams, GGAlign); +int dnkl_iteration(pdinkelbach dnkl, float TempK); + +#endif /* __dinkelbach_h */ diff --git a/src/libthermo/galign.P b/src/libthermo/galign.P new file mode 100644 index 0000000..039486a --- /dev/null +++ b/src/libthermo/galign.P @@ -0,0 +1,16 @@ +galign.o galign.P : galign.c /usr/include/math.h \ + /usr/include/architecture/i386/math.h /usr/include/sys/cdefs.h \ + /usr/include/assert.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/stdlib.h \ + /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h galign.h \ + nnparams.h /usr/include/string.h diff --git a/src/libthermo/galign.c b/src/libthermo/galign.c new file mode 100644 index 0000000..0f96688 --- /dev/null +++ b/src/libthermo/galign.c @@ -0,0 +1,685 @@ +/* + * galign.cpp + * PHunterLib + * + * Created by Tiayyba Riaz on 6/7/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//============================================================================= +// Module: galign.cpp +// Project: Cubic Project - Calculation of melting temperature and free +// energy of two DNA strands +// Type: implementation - Thermodynamic Alignment. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class GAlign - Thermodynamic Alignment Algorithm +// Author: leber +// Date: 01/2002 - 02/2002 +// Copyright: (c) L. Kaderali & M. Leber, 01/2002 - 02/2002 +// +// Revision History +// $ 00sep04 LK : created +// $ 00dec30 LK : changed to do local alignment of probe against +// one entire sequence +// $ 01feb07 LK : optimized! +// $ 01aug06 LK : corrected, included salt and concentration input; +// $ made true local alignment (problem with initial mismatches!) +// #$ +//============================================================================= + +#include +#include +#include +#include + +#include "galign.h" + + +#define nndH(a,b,c) (*(galn->dH+((b)*(galn->maxseq1len+1)*3+(a)*3+(c)))) +#define nndS(a,b,c) (*(galn->dS+((b)*(galn->maxseq1len+1)*3+(a)*3+(c)))) + + + +/////////////////////////////////////////////////////////////////////////////// +// Construction/Destruction +/////////////////////////////////////////////////////////////////////////////// + +//------------------------ Constructor ---------------------------------------- +// Initialize Thermalign Object. This will allocate memory for the dynamic +// programming matrices for both entropy and enthalpy of the present +// alignment. The max length of the two sequences to align has to be passed +// to the routine. Also, an initialized CNNParams Object must be passed to +// CThermAlign. Object will be initialized only once, sequences can be +// passed to it later. + +//int s1lmax,s2lmax; // global requiered for DEBUGGING + +void galn_initGAlign(GGAlign galn, int ALength, int BLength, PNNParams myNNParams) +{ + // Create dynamic programming matrices for entropy and enthalpy. + // The first two dimensions are for the two strings respectively, the + // third dimensions differentiates the alignment according to its end: + // type 0 Alignments will be those that align x(i+1) with y(j+1), + // type 1 Alignments align x(i+1) with a gap + // type 2 Alignments align y(i+1) with a gap + // All three of these types must separately be considered to select the + // optimum alignment in the next iteration. + // The alignment algorithm proceeds analogous to the well known + // Smith-Waterman algorithm for global alignments. + + galn->dH = (float *) calloc((ALength+1)*(BLength+1)*3, sizeof (float)); + galn->dS = (float *) calloc((ALength+1)*(BLength+1)*3, sizeof (float)); +// s1lmax=ALength; +// s2lmax=BLength; + + if ((galn->dH==NULL)||(galn->dS==NULL)) + { + printf("Fatal Error: Out of memory!\n"); + exit(-1); + } + + galn->GNNParams = myNNParams; + galn->seq1len = ALength; + galn->maxseq1len = galn->seq1len; + galn->seq2len = BLength; + + memset(galn->dH,0,(ALength+1)*(BLength+1)*3*sizeof(float)); + memset(galn->dS,0,(ALength+1)*(BLength+1)*3*sizeof(float)); + + // set dH / dS entries all to zero! + #ifdef _pack + // this is FAST, but works only if #pragma pack(1) has been set... + #else + // the following is slower, but works also when padding bytes have + // been inserted in the array for alignment by the compiler... + // time is not really critical here, as this initialization is done + // only once... +/* int a, b; + for ( a=0;a<=ALength;a++) + for ( b=0;b<=BLength;b++) + { + nndH(a,b,0)=0.0f; + nndH(a,b,1)=0.0f; + nndH(a,b,2)=0.0f; + nndS(a,b,0)=0.0f; + nndS(a,b,1)=0.0f; + nndS(a,b,2)=0.0f; + }*/ + #endif + galn_InitBorder(galn); // also need to do only once... +// forbidden_entropy = (-(GNNParams->R)*(float)log((GNNParams->Ct)/4.0f)); + return; +} + +//------------------------ Destructor ----------------------------------------- +// Free memory + +void galn_finiGAlign(GGAlign galn) +{ + free (galn->dH); + free (galn->dS); + return; +} + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void galn_InitStrings(char* s1, +// char* s2, +// int s1len, +// int s2len); +// +// PURPOSE: Initialize strings for alignment table! +// +// PARAMETERS: +// s1: Target sequence +// s2: Probe sequence +// s1len: Target sequence length +// s2len: Probe sequence length +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 01jan03 LK : created +// $ 01feb07 LK : removed maximum local stuff +// #$ + +void galn_InitStrings(GGAlign galn, char* s1,char* s2,int s1len ,int s2len,int minxpos) +{ + galn->seq1=s1; galn->seq1len=s1len; + galn->seq2=s2; galn->seq2len=s2len; + nparam_UpdateParams(galn->GNNParams, s1,s2); +/* lk01feb07: removed maxloc-stuff + // if maximum local is not in reused subtable, need to reset! + if ((maxlocj>=minxpos)||(maxloci!=seq1len)) + { + maxloci=0; maxlocj=0; maxlocg=0; + } +*/ +// InitBorder(); //lk01feb07: removed, as it suffices if this is done once in constructor + return; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void galn_InitBorder(); +// +// PURPOSE: Initialize the lower and left border of the dynamic +// programming table. +// +// PARAMETERS: +// none +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 00sep06 LK : created +// $ 00dec30 LK : complete revision - simply init table now, no +// more saving etc... +// $ 01feb07 LK : removed local alignment stuff and optimized +// #$ + +void galn_InitBorder(GGAlign galn) +{ + // Initialization for new alignment! + galn->maxlocg = -99999; // maximum melting temperature found (kelvin) + galn->maxloci = 0; + galn->maxlocj = 0; + galn->maxloct = 0; + int i, j; + + for ( i=0; i<=galn->seq1len; i++) + { + nndH(i,0,0) = 0.0f; nndH(i,0,1) = 0.0f; nndH(i,0,2) = 0.0f; + nndS(i,0,0) = nparam_GetInitialEntropy(galn->GNNParams); + nndS(i,0,1) = nparam_GetInitialEntropy(galn->GNNParams); + nndS(i,0,2) = nparam_GetInitialEntropy(galn->GNNParams); + } + for ( j=1; j<=galn->seq2len; j++) { + nndH(0,j,0) = 0.0f; nndH(0,j,1) = 0.0f; nndH(0,j,2) = 0.0f; + nndS(0,j,0) = nparam_GetInitialEntropy(galn->GNNParams); + nndS(0,j,1) = nparam_GetInitialEntropy(galn->GNNParams); + nndS(0,j,2) = nparam_GetInitialEntropy(galn->GNNParams); + } + + return; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: galn_CalculateTable(); +// +// PURPOSE: Compute the entire dynamic programming table cell by cell. +// Requieres that InitBorder has been called. +// Computations will be done in the usual rowwise manner. +// +// PARAMETERS: +// int startRow: Row that computation shall commence at! +// +// RETURN VALUE: +// void - Data will be written directly to dH and dS in this object +// +// REVISION HISTORY +// $ 00sep06 : created LK +// 00dec31 : modified to begin at a specified row! +// #$ + +void galn_CalculateTable(GGAlign galn, int startRow) +{ +// #define forbidden_enthalpy 1000000000000000000.0f + register int i; + register int iminusone; + register int j; + register int jminusone; + // Determine previous character! + register char prevchari; + register char prevcharj; + register char seq1char; + register char seq2char; + register float entropy1; + register float entropy2; + register float entropy3; + register float enthalpy1; + register float enthalpy2; + register float enthalpy3; + register float g1; + register float g2; + register float g3; + float maxg; +// ofstream dboutstream("debug.out"); +// assert(startRow>0); + for (j=startRow; j<=galn->seq2len; j++) + { + if (j>1) + prevcharj = galn->seq2[j-2]; + else + prevcharj = 0; + jminusone = j - 1; + seq2char = galn->seq2[jminusone]; + for (i=1; i<=galn->seq1len; i++) + { + // local variables + if (i>1) + prevchari = galn->seq1[i-2]; + else + prevchari = 0; + iminusone = i - 1; + seq1char=galn->seq1[iminusone]; + + // --------------- Upper cell: Alignment of seq1[i] with seg2[j] + entropy1 = nndS(iminusone,jminusone,0); + enthalpy1 = nndH(iminusone,jminusone,0) ; + entropy2 = nndS(iminusone,jminusone,1); + enthalpy2 = nndH(iminusone,jminusone,1); + entropy3 = nndS(iminusone,jminusone,2); + enthalpy3 = nndH(iminusone,jminusone,2); + if (enthalpy1GNNParams, prevchari,seq1char, prevcharj, seq2char); + enthalpy1 +=nparam_GetEnthalpy(galn->GNNParams, prevchari, seq1char, prevcharj, seq2char); + + } + if (enthalpy2GNNParams, prevchari,seq1char, 0, seq2char); // 0 is gap! + entropy2 += nparam_GetEntropy(galn->GNNParams, prevchari,seq1char, 0, seq2char); + } + if(enthalpy3GNNParams, 0, seq1char,prevcharj, seq2char); + entropy3 +=nparam_GetEntropy(galn->GNNParams, 0, seq1char, prevcharj, seq2char); + + } + // choose optimum combination + g1 = nparam_CalcG(galn->GNNParams, entropy1, enthalpy1); + g2 = nparam_CalcG(galn->GNNParams, entropy2, enthalpy2); + g3 = nparam_CalcG(galn->GNNParams, entropy3, enthalpy3); + if ((g1>=g2)&&(g1>=g3)) + { + nndS(i,j,0) = entropy1; + nndH(i,j,0) = enthalpy1; + maxg=g1; + } + else if (g2>=g3) + { + nndS(i,j,0) = entropy2; + nndH(i,j,0) = enthalpy2; + maxg=g2; + } + else + { + nndS(i,j,0) = entropy3; + nndH(i,j,0) = enthalpy3; + maxg=g3; + } + // set to zero if alignment so far is mismatches only (tm will be zero -> no need to reset maxg)! + if(nndH(i,j,0)==0) + nndS(i,j,0)=nparam_GetInitialEntropy(galn->GNNParams); + // check if local maximum found! + if (((i==galn->seq1len)||(j==galn->seq2len))&&(maxg>=galn->maxlocg)) + { + galn->maxlocg=maxg; + galn->maxloci=i; + galn->maxlocj=j; + galn->maxloct=0; + } + // set to zero if alignment so far is mismatches only! + if (nndH(i,j,0)==0) + nndS(i,j,0)=nparam_GetInitialEntropy(galn->GNNParams); + + // --------------- Middle cell: Alignment of seq1[i] with '-' + // following changes lk 01jan08 + // we do not allow $- in the beginning, therefore set to forbidden if + // j=1 + if (j==1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + // also, disallow -$ in the end, therefore forbid if j=seq2len-1 + else if (j==galn->seq2len-1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + else + { + // end lk01jan08 + // -- entropy + entropy1 = nndS(iminusone,j,0); + entropy2 = nndS(iminusone,j,1); + enthalpy1 = nndH(iminusone,j,0); + enthalpy2 = nndH(iminusone,j,1); + if (enthalpy1GNNParams, prevchari, seq1char, seq2char, 0); + enthalpy1 +=nparam_GetEnthalpy(galn->GNNParams, prevchari,seq1char, seq2char, 0); + } + if (enthalpy2GNNParams, prevchari, seq1char, 0, 0); + enthalpy2 +=nparam_GetEnthalpy(galn->GNNParams, prevchari, seq1char, 0, 0); + } + } + // -- choose optimum combination + g1 = nparam_CalcG(galn->GNNParams, entropy1, enthalpy1); + g2 = nparam_CalcG(galn->GNNParams, entropy2, enthalpy2); + if (g1>=g2) + { + nndS(i,j,1)=entropy1; + nndH(i,j,1)=enthalpy1; + maxg=g1; + } + else + { + nndS(i,j,1)=entropy2; + nndH(i,j,1)=enthalpy2; + maxg=g2; + } + // check if local maximum found! + if (((i==galn->seq1len)||(j==galn->seq2len))&&(maxg>=galn->maxlocg)) + { + galn->maxlocg=maxg; + galn->maxloci=i; + galn->maxlocj=j; + galn->maxloct=1; + } + // set to zero if alignment so far is mismatches only! + if (nndH(i,j,1)==0) + nndS(i,j,1)=nparam_GetInitialEntropy(galn->GNNParams); + + // --------------- Lower cell: Alignment of '-' with seq2[j] + // following changes lk 01jan08 + // we do not allow $- in the beginning, therefore set to forbidden if + // i=1 + if (i==1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + // also, disallow -$ in the end, therefore forbid if i=seq1len-1 + else if (i==galn->seq1len-1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + else + { + // end lk01jan08 + entropy1 = nndS(i,jminusone,0); + entropy2 = nndS(i,jminusone,2); + enthalpy1 = nndH(i,jminusone,0); + enthalpy2 = nndH(i,jminusone,2); + if (enthalpy1GNNParams, seq1char, 0, prevcharj, seq2char); + enthalpy1 +=nparam_GetEnthalpy(galn->GNNParams, seq1char, 0, prevcharj, seq2char); + } + if(enthalpy2GNNParams, 0,0, prevcharj, seq2char); + enthalpy2 +=nparam_GetEnthalpy(galn->GNNParams, 0, 0, prevcharj, seq2char); + } + } + // -- choose optimum combination + g1 = nparam_CalcG(galn->GNNParams, entropy1, enthalpy1); + g2 = nparam_CalcG(galn->GNNParams, entropy2, enthalpy2); + if (g1>=g2) + { + nndS(i,j,2)=entropy1; + nndH(i,j,2)=enthalpy1; + maxg=g1; + } + else + { + nndS(i,j,2)=entropy2; + nndH(i,j,2)=enthalpy2; + maxg=g2; + } + // check if local maximum found! + if (((i==galn->seq1len)||(j==galn->seq2len))&&(maxg>=galn->maxlocg)) + { + galn->maxlocg=maxg; + galn->maxloci=i; + galn->maxlocj=j; + galn->maxloct=2; + } + // set to zero if alignment so far is mismatches only! + if (nndH(i,j,2)==0) + nndS(i,j,2)=nparam_GetInitialEntropy(galn->GNNParams); + + + /* + dboutstream<<"After cell ("<seq1len;i++) + { + for( j=0;j<=galn->seq2len;j++) + { + printf ("%f ", nndH(i,j,level)); + } + printf ("\n"); + } + printf ("\n"); +} + +void galn_printEntropyTable(GGAlign galn, int level) +{ + int i, j; + for( i=0;i<=galn->seq1len;i++) + { + for( j=0;j<=galn->seq2len;j++) + { + printf ("%f ", nndH(i,j,level)); + } + printf ("\n"); + } + printf ("\n"); +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetEntropy(int i, int j); +// +// PURPOSE: Return Entropy for given cell +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Entropy (dS) +// +// REVISION HISTORY +// $ 00sep23 : created LK +// #$ + +/* inline */ float galn_GetEntropy(GGAlign galn, int i, int j) +{ + float entropy; + + // first, need to determine optimum values for dG and dH + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(nndS(i,j,0),nndH(i,j,0)); + tm1 = nparam_CalcTM(nndS(i,j,1),nndH(i,j,1)); + tm2 = nparam_CalcTM(nndS(i,j,2),nndH(i,j,2)); + if ((tm0>=tm1)&&(tm0>=tm2)) + entropy = nndS(i,j,0); + else if ((tm1>=tm0)&&(tm1>=tm2)) + entropy = nndS(i,j,1); + else + entropy = nndS(i,j,2); + + return entropy; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetEnthalpy(int i, int j); +// +// PURPOSE: Return Enthalpy for given cell +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Enthalpy (dS) +// +// REVISION HISTORY +// $ 00sep23 : created LK +// #$ + +/* inline */ float galn_GetEnthalpy(GGAlign galn, int i, int j) +{ + float enthalpy; + + // first, need to determine optimum values for dG and dH + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(nndS(i,j,0),nndH(i,j,0)); + tm1 = nparam_CalcTM(nndS(i,j,1),nndH(i,j,1)); + tm2 = nparam_CalcTM(nndS(i,j,2),nndH(i,j,2)); + if ((tm0>=tm1)&&(tm0>=tm2)) + enthalpy = nndH(i,j,0); + else if ((tm1>=tm0)&&(tm1>=tm2)) + enthalpy = nndH(i,j,1); + else + enthalpy = nndH(i,j,2); + + return enthalpy; +} + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergyK(int i, int j, float t); +// +// PURPOSE: Return free Energy for given cell at given temperature +// +// PARAMETERS: +// i, j - Cell coordinates +// t - Temperature in Kelvin +// +// RETURN VALUE: +// float - free Energy value (dG) at given temperature (Kelvin) +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float galn_GetFreeEnergyK(GGAlign galn, int i, int j, float t) +{ + // dG = dH - T * dS + + float entropy; + float enthalpy; + + // first, need to determine optimum values for dG and dH + entropy = -galn_GetEntropy(galn, i,j); + enthalpy = -galn_GetEnthalpy(galn, i,j); + + return enthalpy - t * entropy; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergyC(int i, int j, int t); +// +// PURPOSE: Return free Energy for given cell at given temperature +// +// PARAMETERS: +// i, j - Cell coordinates +// t - Temperature in Celsius +// +// RETURN VALUE: +// float - free Energy value (dG) at melting temperature +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float galn_GetFreeEnergyC(GGAlign galn, int i, int j, float t) +{ + // dG = dH - T * dS + + float entropy; + float enthalpy; + + // first, need to determine optimum values for dG and dH + entropy = galn_GetEntropy(galn, i,j); + enthalpy = galn_GetEnthalpy(galn, i,j); + + return enthalpy - (t + 273.15f) * entropy; +} + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float galn_GetMeltingTempC(int i, int j) +// +// PURPOSE: Calculate TM for given cell. Return in Celsius. +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Melting Temperature TM +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float galn_GetMeltingTempC(GGAlign galn, int i, int j) +{ + return galn_GetMeltingTempK(galn, i,j) - 273.15f; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float galn_GetMeltingTempK(int i, int j) +// +// PURPOSE: Calculate TM for given cell. Return in Kelvin. +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Melting Temperature TM +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float galn_GetMeltingTempK(GGAlign galn, int i, int j) +{ + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(nndS(i,j,0),nndH(i,j,0)); + tm1 = nparam_CalcTM(nndS(i,j,1),nndH(i,j,1)); + tm2 = nparam_CalcTM(nndS(i,j,2),nndH(i,j,2)); + float maxg; + if ((tm0>=tm1)&&(tm0>=tm2)) + maxg=tm0; + else if ((tm1>=tm0)&&(tm1>=tm2)) + maxg=tm1; + else + maxg=tm2; + if (maxg<0) + maxg=0; + return maxg; +} + + diff --git a/src/libthermo/galign.h b/src/libthermo/galign.h new file mode 100644 index 0000000..aad5541 --- /dev/null +++ b/src/libthermo/galign.h @@ -0,0 +1,97 @@ +/* + * galign.h + * PHunterLib + * + * Created by Tiayyba Riaz on 7/2/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//============================================================================= +// Module: galign.h +// Project: Cubic Project - Calculation of melting temperature and free +// energy of two DNA strands +// Type: header file - Thermodynamic Alignment. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class GAlign - Thermodynamic Alignment Algorithm +// Author: leber +// Date: 01/2002 - 02/2002 +// Copyright: (c) L. Kaderali & M. Leber, 01/2002 - 02/2002 +// +// Revision History +// $ 00sep04 LK : created +// $ 00dec30 LK : changed to do local alignment of probe against +// one entire sequence +// $ 01feb07 LK : optimized +// #$ +//============================================================================= + +#if !defined(AFX_GAlign_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_) +#define AFX_GAlign_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +//#include +//#include +//#include +#include "nnparams.h" // Nearest Neighbor Parameters +//#include "../gsfxtree/gsfxtree.h" // Suffix Tree Stuff + +//using namespace std; + +#ifdef _pack +#pragma pack(1) +#endif + +//----------------------------------------------------------------------------- +// class GAlign +//#pragma GCC visibility push(hidden) +//typedef class GAlign* GGAlign; + +typedef struct GAlign_st +{ +//public: +// lk01feb07: removed maxlocstuff as not requiered by thermtreealign... + float maxlocg; // maximum local dG value found + int maxloci; // i position thereof + int maxlocj; // j position thereof + int maxloct; // and type of maximum alignment! + int targetNumber; // id number of target sequence +// PGSfxLeaf probeNode; // identifier for probe sequence +//private: + char *seq1; // Sequence 1 + char *seq2; // Sequence 2 + int seq1len; // Length of Sequence 1 + int seq2len; // Length of Sequence 2 + int maxseq1len; // length of longest target... + float *dH; // Dynamic Programming Table for Entropy + float *dS; // Dynamic Programming Table for Enthalpy + PNNParams GNNParams; // Nearest Neighbor parameters +// float forbidden_entropy; +}GAlign, * GGAlign; + +void galn_initGAlign(GGAlign galn, int, int, PNNParams); +void galn_finiGAlign(GGAlign galn); +//void galn_InitStrings(GGAlign galn, char*,char*,int,int,int=0); +void galn_InitStrings(GGAlign galn, char*,char*,int,int,int); +void galn_InitBorder(GGAlign galn); +//void galn_CalculateTable(GGAlign galn, int=1); +void galn_CalculateTable(GGAlign galn, int); +float galn_GetEntropy(GGAlign galn, int, int); +float galn_GetEnthalpy(GGAlign galn, int, int); +float galn_GetFreeEnergyK(GGAlign galn, int, int, float); +float galn_GetFreeEnergyC(GGAlign galn, int, int, float); +float galn_GetMeltingTempC(GGAlign galn, int, int); +float galn_GetMeltingTempK(GGAlign galn, int, int); + +void galn_printEnthalpyTable(GGAlign galn, int level); +void galn_printEntropyTable(GGAlign galn, int level); + +//#pragma GCC visibility pop +#endif //!defined(AFX_GAlign_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_) + diff --git a/src/libthermo/libthermo.P b/src/libthermo/libthermo.P new file mode 100644 index 0000000..f0802d5 --- /dev/null +++ b/src/libthermo/libthermo.P @@ -0,0 +1,15 @@ +libthermo.o libthermo.P : libthermo.c /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h libthermo.h \ + nnparams.h /usr/include/math.h /usr/include/architecture/i386/math.h \ + /usr/include/string.h thermalign.h galign.h dinkelbach.h diff --git a/src/libthermo/libthermo.a b/src/libthermo/libthermo.a new file mode 100644 index 0000000000000000000000000000000000000000..00900a23c85af0ed54c94872c3961baae6edd17e GIT binary patch literal 37528 zcmeHw4SZD9nfDEuK-3{OwBX_{=%CRCB}fD`Xw`N~j=Zc2x%O+etY2xLRu9)a@ zO&pgEf+S~;8+ZAH2`*Rm1pa53b!J)F*qdh-dAyTdH{WRW{L%|vaS1b5a1-B{qf`Z)&zNE@~Tfz4f z`~dWhg_?FovV<#R2hzoL4!f8N3cTG^ems}_{y&8{l1shaJtD4u;sRfR(4SNOdP{BvhlExI=b zQ@zlRm|R^^;ayNsedoP-(Nt074N+uKMU{Wvf;)>V<}b1(wPCHa1rYhVh1GK^s#Rtp zZuVCr^J0amM78pbs(E)>HDEPk5Q~sJBEP*_7J=1J_}O#jcotSw&91%~3{gb6r4E~L zR9H`ULm423;g+cc?%c# zE134q3V+_h`HQM5=8I@`#r%bLSHuXrcy?7$g@5kCIa(Pmk!8W6+10b>ml;)P@8 zoNb&R6T7*hsxmHtJ-RSf62&&gDq?WbE}|&5+Pv9Su%Q?V(NifQYO4}YTQmo@7l(vm z?yjizPpO$7Nf)OKm3!XeqIrww&-T;0{c|g-=O>u6#KuT&M@jJ_n5gku{E#z>gzff` z6t)||+N{Q&*oKazrZOy3m-&f_*t$f~*z<~6GiC7-4Xode?r(g*d{RH-d(NcGtnocM zLX5?zB*U*xzI$qrrX^u0(J*XjgvWT{!Vlpb{8lpFPY7X*V@Oxfii4-dpMyk%vNC_g z62H8b@nY|c;%MBu8foLlya(`aD2Wa&KwyQkvZ{F;->N1^Oj%hhKbPX;R2jyBk3tCy z>uC=WemN2w&u^T{oT@Up!OzYE(U?$HRx!7%5*?sIqT~7HD1IwdE}!DJ2=gV&I7`#! z+8HwWaQvDJ2$G~RdkFp?{&_BSJ3e$Y%6+Ux%s?U6@bB}(WhWJ~? zG;j91MzJ^h$jv_U6JPB+IUndlydA#AYmbum2YR}<1OEnhqi|BX`0p_P9l*b$LzVh@ z9!-<~eU0BwHG21ojJ)1Ofg>@pI@~xtwecQ@!0BUCRx742>nj3CD)K+iO!j1%5aueD z)oe*L4dtg7H3K*8fS%d*fnGvkZVgp}r4QNfuGFU=WN|O+V|VKh{2Le|Htmo;_JIDt zCKY>`ELo9hhkEjQdwZb|{lV|y!dw5Ye$8AHpU6rEJOl7U6q5=_+XOl5ARr3N>I7uEO$t(0WEplpAjYk% zy8t0m*7blWaM%&>Cu{>`zNZ1n{|A7~_aGq4xf_t>%mrjQtPkmz0x}=_Dr7As1iv({ zfe}gwfp@w9k-n4=MW>AbL?|Hy-uVh(8WLLL2hwOd|EchEKppg{NX&3Hex#?1n5OM! z9Q1&q=Ob>*t~JEP*T=<6;^O}h7r#6%eoFK(OXzD-PTKO7rqds)Lgs%txE`x0+VO0utus^2K5iPZ~n} zyl3%o396?3kp^lD$}A(8$h_RoJpZARi*q7_<8MSikrDjoU||oRH!q8uRb-f|GNywc zFFViyDFXU1yLKc{UaETTxtewfuSa1_1QO{Y^D^`E2bF0f@k_(+%LdNFYW7X_HQt|d zlngLvyD`C}-qzW}sC}MA`g?|(H)dikcX`bGG02W=KAF&(*KaYV6_}qH#Wy!j&kPR= z+_)zk&S&Fl;e3o`1VXL#2k&(mjovZ!A$|20pv7cnlyZIGu}bx3AD6}J=LMLxp!|)3 zMsJSM+wKdFxUDdl!JB#Qy0F7VNYZs-k4qFL(uF+^QDi~meLhh)VBMG<9qVE-hCamO zaP)yo^-ikEylN&*3(cBihWVjkzFKJBcOoz7nUm}dE=wvj=O6W%w;$TYdSQj<#zN1~ zH-~*@QAc6<(L(btqERH-%*(M5zS{5SXg)K~?O))l5BcZ$8oeDJvph5VIR1}=ePZ#m z7{-_9D<~IQh9w9s7nFw!_016HO~II!jO*;W6*k^? z!fxfh;MHtKk-Qa^?G;zi_7&^a=|;rDQKuzW-7`jCJqshCVIDVXPbKLOO+hzB1G);z z+x4x515O#i$-PEnO{Q7D1T1~!0@NSZw>k#s51oUY`mT_^tJ7#@cMjM4k%(qxdLJ{3 zHihdKAx^1!xTB*wf(6EQ=qqMHyD(7N3~W2y8wh&b$xw5<5p)dlt)n%pP;@M@(~DLm z(48xYUuQH<1Cyd0ECPd`n*<9_(h1)>eXL`-Ubg_nGz#I2tGtzCKNe9~*J3H#tTzlo1k@s?_I5-K9%BNT zJV_d}ds{?nGx{XCtoTVpNU7KkOHxroF$yE6glev77`` zUiB~%SZaPK6AN_oQ7RohYPi1DGvL(9`elg9ECr1`2 zs4Y9|En80WCA~<43qVWG6~3ewsPR>ZOr`~U>u_~FuZo(8V|#d4;wt3^+}*&Eneuwk zN^A5?*f_gAZqS<8LJfOF#JfgsI0rVt)AyGrT7!J}!0p7Y8{08FJ;GcJ8 zr2WwGi#F)BYd`7jC3S;n&h=4!yP3SsYOX#;8w?1{*p!gOi2+HDHG*;CXjA_~6skYC z3jdYl!C6qP5X~|si>uY!94n$`Vq-5d>h<3Rt0n_A()1NK;##Kt05gA)<~3_lIe3M= zVr62OUPm7yji*R*9b(P*`h({x3Hy_bqn{ds_4-dvTFVE;$JdC~68&m$1ttwPK1QM; zVyI%wi?I&lUr)X$+HLPgMLXl`kA7nBM_c-!M-`;kKmTcOFZAJsK6c{LGVWo+00TBbd?2G^0np^ug_&;x}&kW==_#Qqpu1SVn?h=rc$ zWuwqm0YMSbel4);DH!ZtjC_*T; z3dNEG4~iOWrqZ@&cvo?}B54%|tp{$fcc=jC8&_U-r&VHXaZN;Lcd|G^Aj_tRFc?VX zXqqwv<-)?(=nwq6P>{u6WIrCMNhIf*m}+DnC!;kHDh6YW$UPecE2$q&5ba`a(0bnZ zII5uRgHVxLcCz9-MTrMhiITB3PtNdi+GLXs5m#u=pTOFM{VRRMTb= z23d4PsDdN@rKg;Mxw~f~nZ~xKH#;O)cgCu>g-Fn{C7xWZrO^y8S4te8hKMVSP5*Md zUwcvf8;fK!MI;l8Fc2*QxphRSf;s-Br+gPvanBXt&((BxXU{VqMg|%=4y*OJ`H^u8 zZWYyza)J5j%~+VHdK&XHz4f7c&;N6^WzVhHJkv0!7`6M7MS0$<{27_|-()l{%@~vY zdVTMb-^JW-8jYR|tWk?IaI+@v4A^IIC)c~jlXO(gJGVMkv~WZa{EVwgGR7?H`12#Y zffl7jM=WtS9hpD^$RhW_ryKprKOOQP{37yu8w)AFx4v`9;h6i={~Pj8^90A-Z4?{k z%W5lOcw;_p(ry$Bdwuh6U*q%~sEZ>V);;*=SgzW7i1$!q9+KXi<7>RZx)0cE1U)Gl zZUfr72Ox3b00;hiQsve?E#>eR$b)qf@}#PA(u=SIUv#)S1uK}vcY#!N81X}k$4O42 zWDo8eapQU+N@m^_sc6CBqVcf7ZrKm=SO4-fnH;uE9FO}=>R4e*8FzpAKVF|X>?L1T zpM7TPTc3R-J&iuG0fC){ieu(W*p4`XT?&j1nC#Em4{^0oYRrw#n#4va~n<)eQpH%%-_)3v7$tO`amyh<3lSkzloZPt~gAj$n3a{>+onI znxuNB-L6F(l!KDOQA$xE3Lr{#%Im`#kC$vSEm($88p$ zN0DpL@ldh@Q=wKnaFtjOY91!V&noY2T#Jy9KNaYQ9IFP3=V7ap9o#Lh9Tb#ov|PmO zd}?tMB0>Tm79UJ`$N?6dLu&`EBBl^Od;VV_E*KcyadZv7M?#D{mp8I{%`Ab+yh6MevomM$&QyhGh8D!%##Ft+w(O)_Lpf6ef zc)R{~f8{Iuesn0vK5QSLj^~oo051R>sbCKnFuoj+>HL5&iL5&SVE|b}ReXSoXQS6J z{bxfY>{Rd%*ytnvUlqJp!Lfj&fgcQ*3HUCAK$S;64@mq11&aVl&rf68m#BD}ivJ0%#B{F!GTk;nrdy`svsC;-1qUho@3FtYeER{JelsBR->L9r zD(+VFe?fm_`kw)wr)gK;1$YtC--bR&*m#Dd*8-A$10eBF!<-1u20R~-&Nzf`EBH7d z`Q9>6;;&I~7$EWKXcVSDfzD0%D?sAU$3Hxh${G%MG2piW5k?LKMAKyT4A8VufUhg~ zBS4IWSt}Hz&m`sQMxzk!Q1IIda-W;{cNHuE{0`zSK>V^$hJcwU4^2@@2-T)#0D^T1 zA@I(lh``-(<;GH4p3C|Bp3Gt{gZ6_ehCj{OZ07Ta+C4^4XmI9Ja z2)uJDAiAvZCqP#e{sg2KO1#qrh)1lYgz&=^{sg3tka#EUAPqn3x$L_t{ueN1c#m-i zYCRWSi%a?wj6-D@bkONfARG+<=o?IW6Ab{NTG3so9K+>`?ndJ>lq!0+dJcS_MMq^A zHY)lim4B|HccbzQ4T?TX(VtZGkdps^RX!imBK%O%n-zch^iuwAmH%3#q5RY z92Y-3j^E(8cw=1pK3sz<;hfx;4{@J24BJ}6~dj($l=AY2G|42!7_AQ6qsRb<5<$8=6_YKp|w7Z1EoxBQHbG8;C3 z>qBPyvI~9Y@3T9b^sR$(^Yt~mbMy2yp)+;|P9_<(hkAS4e7)gGFV_44I{>L3Gjtnw z4Fb$Q%^m19T8eQ6#{LTs%Wux}{~cgnbH4xEfW`h){GZZ1#h=o`1{9lk9?P#NK_0W6 zeg}@^`s3}GL)+a+8OzSg?u_g+n8!wUK$>-v-qW`hC-<%#nq2cvdvXTlf>4!B3H*YK zPMP8QNF{h=177%{Ba^bMtrP0qb|==IzMm#oy!>`_IUovSSJ( zXYOD_iG3k=&Tm?9t?Ay5-uC)VLw|Vhqz_Pnr>?u^X9eXUZ+*yPwij@tIMQC{_t{=3 zyf~%TSXrE+)$|zcUM-{0{76)QOh8CLtp53W$jkh-;yC>z8d`w}bJ{w}7&gg%M z>3fZK&p?$P`*fl--}1d35GeXgV^5rHg~7*NkV@!=B90IuCTJl>SASBPOh^SqL0wT@ z$%MY(?IB;|v(zwPl1RKT2;L>(N?i4HUvEY*AsIxoNSzzuHTzoMEpZP|Cxz}=JD>RQ6nzHa+n=&=6 zl>@2B)i@DX7`^uR%rLYo*6Dy4e}y}M7{?`h^S6cO+fjzLU22Op^3mNN;x|@cvl_BD z)|}|v15PY%e^;S+boFrSEfXt680Q8Wi+zv#(e((Ern=Q_&FAie@_t+d-e^icdrnu_ zr(`^FA9R99jDOMexP6Z9gN`8yKWiV8%EixKuU8NkA%RaS(5q5v2ReA?^-yw*&wf~?B`v4k0UF(9V)k>>+qULH9V5Vy!9(*QA7XZ3;|=KZV>6?_|z_(Oo)m)Wo4 ztt$RQKNw)&jZB1M3z&*%M zC*TEus{om=RKY2L=OLaA$bBX5-;4s}JFVvfa$kw**$#w!_ca5sTg6{dut~vR!3>#h zpMqQmfyJ;o1@BQXOVQtju9^PF3bOk#J_B@Ap_CB$(~1ERN(g~>rURnoN(s>}X--8a z1m4+=#M~buL{p?421FWe9;r z`X=OI*r4dg6nzPBq`UBAC|2|(ivB2YBEO>FtLRTE`Y6(YtAnx_xW;5WpyFd0S9wVv zsPeZc`ea3)N+QBJiq7}M87lB&{%%GPu2FQKDxa)bp0&P*jzoN^%KtkguobWQQIn@pt3mZ^gwq4gwVk$K&F(OFR9_xcHH{^uLaaUl5l*e!oY& zKOFlial*^VF)uF1z8{QtJE|)9I(F9&l->B~Ug;@1Q zu5JIodP$e4*w=1j_X(_YxZStO6nUAqpBHjtQ(NyBMRz6Cen^ZweY{DO_{vis+bD?} zCZ~PxToA<$6`HA>@9(P(C<Yr|vm*1AnFiiJw zMDhjJyv~8^hNEZ&(o92;#_-x#8y;>hC#5mj2-hCLib*CJ)0ph5Z5wWc^Oh1(P`k%r zm@7yJs|Q|ItPJzIrH#o2wYweR$s(Pvw%uW_AesD@pRo8bnGeYzs3^C9yv+dVNNBpp z6x4=>8@*k6eKzhgjc|PcHp_y^W^xX%46{Cjg)|ZMYZTQmA6NoakVXB=AmZ(&mT+Et^?A7EHNyEWnONM=8sP@sG)Qq9*h(zFVIA@+oLdwQ+}ubN z8V;!{G(3qaWcM28iS0BDZ_t+%^;Zfy()fs1t38pT*Du6MH{$+S2P~xAU3)U6=HuSB z+K*Fe4q~^@SKI5Td8W6$_EbvE&)r*-u#qHycI@U_HhPm=Y|>f$>XQpAxLf$(Ry+EN2eN zzCO4irx^kv$AIgDbveg?D>7e0=eUxcL-dv+^FYz)-xZC1xp4IBvWpasJ|NvWmEPdG zniqCl!m0wEUs?HL2roZ!l;TT}sQ8N^8hv>3aH7K34VM^LdIJqbAeZw>;H8R`nV=I4 zhj|V6Ee|sR^e0S<4Po{|8TV~L6aEp_)9Y=ny{{ROFd{8l=!1R6cFmpKzDv`dY%|P; zbr=|posX`6mXRvlrM5h}{^_>X%F3U&87*4i-F;qjwUcRfj356D(ri0~J5J*{P5Z;X z=kQ^UcCYzFD(+da#5}og`}px2ndB*qYTz~%BiAm?r6JAgEm)%im2m-V&U~U7z}Wui z`hPTxohZYLGUpjyvmt=yH_X-$^x-wv7P2JB`H07BZp^^i-!M0JU@kSD#}pAV%(ca- z#I0U~@=8UW6eH&oQz=}yVGd@L@#BBiX6(>Hx)+FUNTF@IupO5! zRQ%EPueE{kV!WTyuz_vuI)|jgP{#Q2d#F^ffkv88Nod5wN}3H5z_n$6SCtzgwg}2v z2eoc)dr~A=`=N-n@{z+1sLX>r+$%tWRRGk|UZ9TQ8XT5Fjuw;_LKUW>awu*DRsI}y zm##tkYnOV^!tEg_d5KW@#t|q5)jou&5;_b~%jW6~q2IO507#h&<3KadFhEHay59IQ zDg{&c9vY~DlQy(;n7TySCc1}+P!R*Cfq-3ja((R+~1Y@G|0p~^YZ*qT}8{asJFAzhV}`zbI(fpw4@$@e0` zE-i2snsO~4pdf2E__prvdWU&hgn_O-;i9Q%XYYl%e>;Svp9ZKyKb|+#Gl$%CS+X<>I@ErBJ2o>ig zT3`ZOz-$OfFoL=QsTEEAvOujZ6v|xd5F)QWfn6$;ngjhHSD^%Z0iabrI-)PyoK zs5EHmD&h_c-0EUf5E|0jz+eq~nYpcsovB50kJyKHc#3u1zQ4=CL8kIrSnj2$^=9rJ z02~%gx7sNhXmuR`q@M%MXrc`$4P)07^z4nBkQ~gqDgATk+Nfm17+4ncSB0uTwoFtK zW%J!q=wgkqF=1{_BvY2R7tN+F)4^O_9;FIh0SLQxi(t~g9U8>e0ibFfyP=d%agAZ? zHRy2lIAJ^6W(cr$B^0o0f7b};q@hD7VD(Ta0K?sD*Mi9sF|stYKn>tEo0eRK)@`Y* zd>{mkOu^W&(JkuzL;xB`y^eq{>Xk7HLcri^kz#GPu%C?%8p(F-@#<9&$<3LYc1l;N^!hE(lpu6X^?!Bz%RH4p>CaQDi{;ri^x9u=-;f5~A zq6N%-7>Ql$_tC6S^UBJL*hhC#X{3c4iXo7~ZbA)({=yA8C|B$LA&1GmymU`QX{$$| zg;3~DXn%WUWtu^Cih*aF2Q{Wv(8R5)pgZVsRj5_lw95K0M-CtB72V2Tqn)y5KGzLbWv6tZ4zR}#4~#Wr?4y0U zQOE`;0{TCUlA)F(qO7$Ul6$7$*g6*+QHw2-|A#U*M%csZDwk;8fz&BI1)&SE2B0o5 z%%Nzdp;Td`BEeLepzOsR(aU0XXav+G;1Y@oajxo;_Td!vp@lLr!|(4JgL-ZA3tMa5 zB8;VBoa`Tqp#4_Oy;H=E&DlFUtl^POj}z-(W_5@Y3um$AB1531rTYik9@-on^F>V1d~H%>J&f=TURN#B2cu(->rKD8VK?KpiX?YH+a{8@@x- zw#Niz^$`(k{ZQKVUi7aWn)^TN>mGlK}W>8fkkg)+Tp|oehQu&XHblA1Ko6$No@f}&8Mq~#i+)z zWx6c`fst0&!b=W$m_SeoxD$}|PLZINtUm=7+6n;!g1Dnb!{TxsyR~e%L#Q`qO*gQ7 zvY|vMM%9G~=&hpfHsqcwI>a^9V>1#$j}yojlP!#ES7qf`D&37#08?RTJG5CFX@IkU zZ(S;syxK1W+RW!E=vS!#n8~|18t)Zz*yeO$s2e?lg~X3hAX)n(vMq8&Qq8x4eOhY%FtAGVONo_@x)WFo?scM*u3ia%rL2i*V{IX;W;U!6l(k31*wc{V zqP{l$hWgrsA!qYcsQ@2<7^=-}O_J3NIdrY#V1*V#O!bapU{vpTjA_CRM}SeiW0C0b z+nP}fG&ezxXMV&)UkNvS3XJ9!Hj~S8Fz^#2jou+KtD>E~3&Vl3nI?f1%03{lP&O9# zz@pV(1_o7LCt?kUWUK{)2o!LFqffY@L<}inir)YXR&VKi8eh_G+{W^*v`*ZWid$?0_vwBe_neR&HXF}$Q5xmRXE*ra$8MI_uW-yk-Pc)vV{H}V_A-jKRqt`qmmQ-Z4! z?w9er`+Dv!S@+9$)?PIFt%A{qibnsT!2D%_dC)idP+pi@CGLwb@;HW{S6|*%P`lTG z>q*3g9RrE-@gCV2E-=@LM7Zm1qGH0eyz7vhZI6}}r{%BvLr`Sr*B7bBhFzYG8K{8jj$@1KGH z#r`|-KcnUf@eED9j|Gf$tMdN`F6m44HXwbg-T*9aHvF#wW;AE`U+iqT7=>64S!#cx zvtXCS&?7x>r^E-L=Z10UzLirixYALok?6Zy{%d|XUN0nI-=@UhT-u2rtpen#Mg zVTc_+{{&p;89M}zOkC$1JA?=$a9wPOLB*erYbnG~aDu)Keb-Ack1$}Tulyi<^2$3o zYGL%k7p|Oe1l#pK;szd~ch~jG{Exsc9rriIN0WHgy4ce(exVD~Pf~nrKz!t`Kt8?66g?fA zw(RTIV&_v}SLuuWS-Bb8FNWdNOaP~$L$`BP3isKXOJ#o&194A1 zkRmR|25}|ZU*th@|EqzMDEnU_S>PlJ=d+N&Nr(U10H*!#pq2g4Vm#67RtY>xe@0vs z`_IKFVu+cb<^n^^B!B)pJAX0sN5_}k6nskLz_LtMC;%3@)(CNSZocu0WWSzUZ2SUW zP}u?+^^M~7hkk=05+dkz+>HPPYCOhez8a5lS**rmTnhWN+c$<2W#1EVsqCBcxw7x`aH;H@lcTclsJ?0GK8~e* zSUOw^3-}#Htoo$Ao3AO0_n5C@_?c1oFuw69K2l|U|9FPCOPVr=sb(0nuyTFH#Luw$ z89VqEm?qXai_gS2SGx3ue}w`tAZ6Otu41gP*$1rQ1-*|Zt1seXDls?qEWxFvPob)w z$tX_QuUKL9JkIhte+aGiToj=Th4-8ti5p^l(oI9k8LX&+Lq-fW-rtC}}Cc0DQz#aE*76nYAlyJFTdZkONX zL|ORO*fuhnex&|&_~+`GhnG!4BvPh$Q!n$dRcs&Sy(!8Qe(Lp5`Af_2*~cD!5+{A+ zIVfDJE0RAcLEe{Ow7qSUKBBxcaLY5}Wy<|Fgc2(|$bCz^+^`JE76oE4eg*znWe$5B z89|i&Ro|Le-fmlYC4;{w%W0#2CdZcJGf(<{eYEz?B;OeaR3pvk?71E0iT!VrZV1=n*NU<2T0U>(FY~SiFOsmoYp0t(;z$R|4Tzr>2))pI?X7dJ`u&Z;-w=<` zeeLLy)w};QbBOfoU9Y>}oBPL=&z?AP&Gx3Bj(%k8AFsi(NQC(BX+uGp13$|T_jlad zVTk&xVwFKZUkuEAkDt2wb2vI&5Bl`Cj-l>w?%H`HbA9ECZ~F0(^sJ@$t~fV&`c2O*4-umBNWCd^y!=-wye70L z_)Gotliq3;O?;?8q-T*4c_tuY*N>mg#SoQ;EvMRXRkCb6N5c2i$DkvM5Y2~eg=jhP z_A-HmGDOp>b|%5@C(ZQH4q~J3OmMb_A9hBv~i-u;o<=D1m{6+=0qOK4%>t_mb zVaIar1BA#~a{%!G<7x~R0zXaRCj*ktI{ZTsrGzLVZ4Dqo2_f*#sesUDDIr=atx(Yk zfp@w9(P2snVR&gH6rB)w@!mUj!o+*;(4Tnko&1l$o`839zQqPtDIq#)n((*)oe+5E zNEXlZi* z5lRSwce()4l%<4t&=BL74ZM?QM$s=jzyt_C!B6;2Ls*7ipdR$gJ}^qg{|$ijW<}qp z=yaZENM;evo~|1y^Qz2HW3xSLr~Boalr~;l=V8f_g38#x(AfsXotoih3anXJ0FK zj56$$7(LhtQB~NnQEk|8O-e4zjUqIIpPdKhxR~&He$CB_ z-!a7`1b%`C1Re)Yn3Fm6L;{m>Xbxaek^vaPq9Oz&7mZW#V_+I99p*UBQ(rppzSer~ z@!xcg1Z4{U?>Q3x|DGd3m-t`7CEh$PHf*J9{L6X4;=7aaMbL~S+Lylh0Ke>7SdMqG z<$KQ}Elz?+JUc?KOTko&Jrlj|Q`k;+FI?U0@jCAJeKxxU zXYFf?m;d?Zzjdy~CX^I8*J2rl4StFD3!Q5*6FI-&xfTT&!mJG+wOz&I7d+6a^#yV6 zcLmrS-~722H=!8Ycfjj5Auf*=cm!8{pKH-Y(Q0|F#Q@wUK}hRdiw{T;AKCsU&$YN3 z_ey*{q~WnMa3M})z_}JzAOgLpb1g)ZGKvp0%R?<9A8oeIvv@2b+&6fh#Xo|nRM*-l z`#w)%z;o%nNMe2IyzV2J-gchFTM-W5d#W|LvwM%{#SL5#XVRB z^>K~``y0Ot&z5Q6xiI33?F}<=#rEN`7tv42%g@KJBTk-Q!6Z1gVu*JGaS3NvJcCcn zGl@LA;w=m`_9m=5&4_|MVrKj*XIK0izia>ClladMsYAH0dv?Wqu!`9i$2kQ&$D#~) zIdpM5`iq`pkw@{Y#{zYn6Yy@?SS76(Ml2m!>cN-JW6rTSD_$PkIR!t!`Vm#`^BjxM zAERnZz}hOF`fstxAkIp_nL3KjuMQV|-l-0=DJswX;5iqih`$`E{F?E#HuB7E=V~JtH7G)bmxM6)pIb~zxi`8YQ&tAa1I8$ zQ=~%%M1PDur{U|KgYg-r0x_yS@bho(9E`Gl&cXP}H+K$3C7wr#_Wp*>!T3+iDLtRD zU1RmD&Z$u6U8r6+!~5mV!TA1v-#HjpQ|>|0b1-WAJO|@{{J8<*R_Du}8_;K_zRwNl zBdOzm&tVYbP4rxgJ`PvF{V2v*>oAPyxfXpS{VUXGe5?PpoLlf!&yBzd6mjQLNQYCo zK$QP^KmS$vAA)nj;9tQizT~+SYS=7ES&O`JewL?`N93~~!Ew4nIUw9jo?0F~ltWhj zbf-Q*9QsdKXFky0)i&zqe(x-Ui5VPxYGZpoFXD;cJHs}s4AJy>tQUQ5o{NYBKg-YJ z9qVCGG(ER-@qC`CKL6Z@ntti0!VC#=VCB%qxexLA z|6iZ0@MX_6h?l=uRKBnJrvmxSpUc4KfOb1jk5?1*ujGoCcecVOwpZWs&W6FGysR4m z(V|&b1LApm)?h#w+^`fC|1|_*d@mr={{oQd=cxEiD!va^O+SR7f-@ET2Rzg!{%Qr! zP>=_+68|nXvIw^;SO&=QiU8^Fa4Db*@XxSQ#$N+U3cpa{zoYQysW_cInC}vFlu>|H=ns^WZ!{5J1xP;Q0Y|{U;uGBDGF>hp z>6r?i1;}*CfK2yG+}ttUg$h21=3)A~6}&^i3jvw$RVbVE-3tB~ka&Jmfp~r&jj#-m z_^0s?{vKIR0A2w2Q$U1~{|69FlEv@XJe`s;#42_f*#m4IlU^NJy{4Kj6jr z3b+>&=PN+P;(P_tJ@^N_I9~xO6z40TNO8Ub=??q@-Wh@(@QAUL5YN}d`3j`(lz3+V z5M8B|5bYqo-$(jVi5K7RgG+((qU7?JD;Xd z_*u^@pA|ylxhI%0{1`v_J%pq^Xn(;q{T3`gku=8fI{@V|j6{CY$EoL?p8`jE0FA}K zb|BsI)A+v1?^5NhSM*uxx$YE2Z-NpSDy{sAe~zM$Q}iN5=krm9If`yg)b}g;IK_XI zRX&u3Y$pLrk>rh?_|aN;#NF_I76!J0?Dn?IV8pEMFw0?F;>VVj~~?Lpe4U zc65}5#UZXagl~r|(bg7mSgCm%GET)dN~}h0gs3hOkWtr@NY!oCk%tXLDA5}AOOWRl VL~_`8#ZjWXC{tlxw#}VI`!9O-I +#include "libthermo.h" +#include "nnparams.h" +#include "thermalign.h" +#include "galign.h" +#include "dinkelbach.h" + + +//#define EXPORT __attribute__((visibility("default"))) + +//char targetsFile [1000]=""; +//char nonTargetsFile [1000]=""; +//char primersFile [1000]=""; +//char primersOutputFile [1000]=""; +//int minLength = DEF_MIN_PRIMER_LENGTH; +//int maxLength = DEF_MAX_PRIMER_LENGTH; +//int productMinLength = DEF_MIN_PROD_LENGTH; +//int productMaxLength = DEF_MAX_PROD_LENGTH; +//char forwardPrimer [PRIMERS_MAX_SIZE] = DEF_FORWARD_PRIMER; +//char reversePrimer [PRIMERS_MAX_SIZE] = DEF_REVERSE_PRIMER; +//int beginPosForward = DEF_BEGINPOS_FORWARD; +//int endPosForward = DEF_ENDPOS_FORWARD; +//int beginPosReverse = DEF_BEGINPOS_REVERSE; +//int endPosReverse = DEF_ENDPOS_REVERSE; +//char maskTargets [PRIMERS_MAX_SIZE] = DEF_MASK_TARGETS; +//char maskNonTargets [PRIMERS_MAX_SIZE] = DEF_MASK_NONTARGETS; +//char degeneracy[PRIMERS_MAX_SIZE]=DEF_DEGENERACY; +//int nseqForCandidates = DEF_SEQ_FOR_CANDS; +//float minCoverageTargets = DEF_MIN_COVERAGE_TARGETS; +//float maxCoverageNonTargets = DEF_MAX_COVERAGE_NONTARGETS; +//float maxSelfScore = DEF_MAX_SELF_SCORE; +//float maxEndScore = DEF_MAX_END_SCORE; +//float minGCContent = DEF_MIN_GCCONTENT; +//float maxGCContent = DEF_MAX_GCCONTENT; +//int gcClamp = DEF_GCCLAMP; +//int maxPolyX = DEF_MAX_POLY_X; +float concPrimers = DEF_CONC_PRIMERS; +float concSequences = DEF_CONC_SEQUENCES; +float salt = DEF_SALT; +int salMethod = SALT_METHOD_SANTALUCIA; +//float minTempTargets=DEF_MIN_TEMP_TARGETS; +//float maxTempTargets=DEF_MAX_TEMP_TARGETS; +//float maxTempNonTargets=DEF_MAX_TEMP_NONTARGETS; +//float deltaTempNonTargets=DEF_DELTA_TEMP_NONTARGETS; +//float maxPairTempDiff=DEF_MAX_PAIR_TEMP_DIFF; +//char primersLabel [1000] = DEF_PRIMERS_LABEL; +//int fullStats = DEF_FULL_STATS; +//char seqRank [10000] = DEF_SEQ_RANK; + + +int countGCContent(char * seq ) { + int lseq = strlen(seq); + int k; + int count = 0; + for( k=0;k0;i--) { + seq[i]=seq[i-1]; + } + seq[0] = '$'; +} + +void removeDollarSigns(char * seq) { + int n = strlen(seq); + int i; + for( i=0;i +#include +#include +#include"nnparams.h" + +#ifdef _pack +#pragma pack(1) +#endif + + + +float forbidden_entropy; + +/////////////////////////////////////////////////////////////////////////////// +// Construction/Destruction +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void CNNParams::InitParams() +// +// PURPOSE: Initialize nearest neighbor parameters. For now, simply set +// params as requiered. Extend to read from file sometime. +// +// PARAMETERS: +// none +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 00sep06 : created LK +// 00dec17 : modified to use new parameters LK +// 00dec29 : included dangling end data LK +// #$ + +void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm) +{ + nparm->Ct1 = c1; + nparm->Ct2 = c2; + nparm->kplus = kp; + int maxCT = 1; + if(nparm->Ct2 > nparm->Ct1) + { + maxCT = 2; + } + float ctFactor; + if(nparm->Ct1 == nparm->Ct2) + { + ctFactor = nparm->Ct1/2; + } + else if (maxCT == 1) + { + ctFactor = nparm->Ct1-nparm->Ct2/2; + } + else + { + ctFactor = nparm->Ct2-nparm->Ct1/2; + } + nparm->rlogc = R * log(ctFactor); + forbidden_entropy = nparm->rlogc; + nparm->kfac = 0.368 * log (nparm->kplus); + nparm->saltMethod = sm; + int x,y,a,b; // variables used as counters... + + // Set all parameters to zero! + memset(nparm->dH,0,sizeof(nparm->dH)); + memset(nparm->dS,0,sizeof(nparm->dS)); + + // Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small! + for (x=1;x<=4;x++) + { + for (y=1;y<=4;y++) + { + ndH(0,x,y,0)=forbidden_enthalpy; + ndS(0,x,y,0)=forbidden_entropy; + ndH(x,0,0,y)=forbidden_enthalpy; + ndS(x,0,0,y)=forbidden_entropy; + ndH(x,0,y,0)=forbidden_enthalpy; + ndS(x,0,y,0)=forbidden_entropy; + // forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap! + ndH(x,5,y,0)=forbidden_enthalpy; + ndS(x,5,y,0)=forbidden_entropy; + ndH(x,0,y,5)=forbidden_enthalpy; + ndS(x,0,y,5)=forbidden_entropy; + ndH(5,x,0,y)=forbidden_enthalpy; + ndS(5,x,0,y)=forbidden_entropy; + ndH(0,x,5,y)=forbidden_enthalpy; + ndS(0,x,5,y)=forbidden_entropy; + // forbid X$/-Y etc. + ndH(x,5,0,y)=forbidden_enthalpy; + ndS(x,5,0,y)=forbidden_entropy; + ndH(x,0,5,y)=forbidden_enthalpy; + ndS(x,0,5,y)=forbidden_entropy; + ndH(5,x,y,0)=forbidden_enthalpy; + ndS(5,x,y,0)=forbidden_entropy; + ndH(0,x,y,5)=forbidden_enthalpy; + ndS(0,x,y,5)=forbidden_entropy; + + } + // also, forbid x-/-- and --/x-, i.e. no two inner gaps paired + ndH(x,0,0,0)=forbidden_enthalpy; + ndS(x,0,0,0)=forbidden_entropy; + ndH(0,0,x,0)=forbidden_enthalpy; + ndS(0,0,x,0)=forbidden_entropy; + // x-/-$ + ndH(x,0,0,5)=forbidden_enthalpy; + ndS(x,0,0,5)=forbidden_entropy; + ndH(5,0,0,x)=forbidden_enthalpy; + ndS(5,0,0,x)=forbidden_entropy; + ndH(0,5,x,0)=forbidden_enthalpy; + ndS(x,0,0,5)=forbidden_entropy; + ndH(0,x,5,0)=forbidden_enthalpy; + ndS(0,x,5,0)=forbidden_entropy; + } + // forbid --/-- + ndH(0,0,0,0)=forbidden_enthalpy; + ndS(0,0,0,0)=forbidden_entropy; + + ndH(5,0,0,0)=forbidden_enthalpy; + ndS(5,0,0,0)=forbidden_entropy; + ndH(0,0,5,0)=forbidden_enthalpy; + ndS(0,0,5,0)=forbidden_entropy; + ndH(0,5,5,0)=forbidden_enthalpy; + ndS(0,5,5,0)=forbidden_entropy; + + // Interior loops (double Mismatches) + #define iloop_entropy -0.97f + #define iloop_enthalpy 0.0f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + for (a=1; a<=4; a++) + for (b=1; b<=4; b++) + // AT and CG pair, and as A=1, C=2, G=3, T=4 this means + // we have Watson-Crick pairs if (x+a==5) and (y+b)==5. + if (!((x+a==5)||(y+b==5))) + { + // No watson-crick-pair, i.e. double mismatch! + // set enthalpy/entropy to loop expansion! + ndH(x,y,a,b) = iloop_enthalpy; + ndS(x,y,a,b) = iloop_entropy; + } + + // xy/-- and --/xy (Bulge Loops of size > 1) + #define bloop_entropy -1.3f + #define bloop_enthalpy 0.0f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + { + ndH(x,y,0,0) = bloop_enthalpy; + ndS(x,y,0,0) = bloop_entropy; + ndH(0,0,x,y) = bloop_enthalpy; + ndS(0,0,x,y) = bloop_entropy; + } + + // x-/ya abd xa/y- as well as -x/ay and ax/-y + // bulge opening and closing parameters with + // adjacent matches / mismatches + // obulge_mism and cbulge_mism chosen so high to avoid + // AAAAAAAAA + // T--G----T + // being better than + // AAAAAAAAA + // TG------T + #define obulge_match_H (-2.66f * 1000) + #define obulge_match_S -14.22f + #define cbulge_match_H (-2.66f * 1000) + #define cbulge_match_S -14.22f + #define obulge_mism_H (0.0f * 1000) + #define obulge_mism_S -6.45f + #define cbulge_mism_H 0.0f + #define cbulge_mism_S -6.45f + for (x=1; x<=4; x++) + for (y=1; y<=4; y++) + for (a=1; a<=4; a++) + { + if (x+y==5) // other base pair matches! + { + ndH(x,0,y,a)=obulge_match_H; // bulge opening + ndS(x,0,y,a)=obulge_match_S; + ndH(x,a,y,0)=obulge_match_H; + ndS(x,a,y,0)=obulge_match_S; + ndH(0,x,a,y)=cbulge_match_H; // bulge closing + ndS(0,x,a,y)=cbulge_match_S; + ndH(a,x,0,y)=cbulge_match_H; + ndS(a,x,0,y)=cbulge_match_S; + } + else + { // mismatch in other base pair! + ndH(x,0,y,a)=obulge_mism_H; // bulge opening + ndS(x,0,y,a)=obulge_mism_S; + ndH(x,a,y,0)=obulge_mism_H; + ndS(x,a,y,0)=obulge_mism_S; + ndH(0,x,a,y)=cbulge_mism_H; // bulge closing + ndS(0,x,a,y)=cbulge_mism_S; + ndH(a,x,0,y)=cbulge_mism_H; + ndS(a,x,0,y)=cbulge_mism_S; + } + } + + // Watson-Crick pairs (note that only ten are unique, as obviously + // 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc. + ndH(1,1,4,4)=-7.6f*1000; ndS(1,1,4,4)=-21.3f; // AA/TT 04 + ndH(1,2,4,3)=-8.4f*1000; ndS(1,2,4,3)=-22.4f; // AC/TG adapted GT/CA + ndH(1,3,4,2)=-7.8f*1000; ndS(1,3,4,2)=-21.0f; // AG/TC adapted CT/GA + ndH(1,4,4,1)=-7.2f*1000; ndS(1,4,4,1)=-20.4f; // AT/TA 04 + ndH(2,1,3,4)=-8.5f*1000; ndS(2,1,3,4)=-22.7f; // CA/GT 04 + ndH(2,2,3,3)=-8.0f*1000; ndS(2,2,3,3)=-19.9f; // CC/GG adapted GG/CC + ndH(2,3,3,2)=-10.6f*1000; ndS(2,3,3,2)=-27.2f; // CG/GC 04 + ndH(2,4,3,1)=-7.8f*1000; ndS(2,4,3,1)=-21.0f; // CT/GA 04 + ndH(3,1,2,4)=-8.2f*1000; ndS(3,1,2,4)=-22.2f; // GA/CT 04 + ndH(3,2,2,3)=-9.8f*1000; ndS(3,2,2,3)=-24.4f; // GC/CG 04 + ndH(3,3,2,2)=-8.0f*1000; ndS(3,3,2,2)=-19.9f; // GG/CC 04 + ndH(3,4,2,1)=-8.4f*1000; ndS(3,4,2,1)=-22.4f; // GT/CA 04 + ndH(4,1,1,4)=-7.2f*1000; ndS(4,1,1,4)=-21.3f; // TA/AT 04 + ndH(4,2,1,3)=-8.2f*1000; ndS(4,2,1,3)=-22.2f; // TC/AG adapted GA/CT + ndH(4,3,1,2)=-8.5f*1000; ndS(4,3,1,2)=-22.7f; // TG/AC adapted CA/GT + ndH(4,4,1,1)=-7.6f*1000; ndS(4,4,1,1)=-21.3f; // TT/AA adapted AA/TT + + // A-C Mismatches (Values for pH 7.0) + ndH(1,1,2,4)=7.6f*1000; ndS(1,1,2,4)=20.2f; // AA/CT + ndH(1,1,4,2)=2.3f*1000; ndS(1,1,4,2)=4.6f; // AA/TC + ndH(1,2,2,3)=-0.7f*1000; ndS(1,2,2,3)=-3.8f; // AC/CG + ndH(1,2,4,1)=5.3f*1000; ndS(1,2,4,1)=14.6f; // AC/TA + ndH(1,3,2,2)=0.6f*1000; ndS(1,3,2,2)=-0.6f; // AG/CC + ndH(1,4,2,1)=5.3f*1000; ndS(1,4,2,1)=14.6f; // AT/CA + ndH(2,1,1,4)=3.4f*1000; ndS(2,1,1,4)=8.0f; // CA/AT + ndH(2,1,3,2)=1.9f*1000; ndS(2,1,3,2)=3.7f; // CA/GC + ndH(2,2,1,3)=5.2f*1000; ndS(2,2,1,3)=14.2f; // CC/AG + ndH(2,2,3,1)=0.6f*1000; ndS(2,2,3,1)=-0.6f; // CC/GA + ndH(2,3,1,2)=1.9f*1000; ndS(2,3,1,2)=3.7f; // CG/AC + ndH(2,4,1,1)=2.3f*1000; ndS(2,4,1,1)=4.6f; // CT/AA + ndH(3,1,2,2)=5.2f*1000; ndS(3,1,2,2)=14.2f; // GA/CC + ndH(3,2,2,1)=-0.7f*1000; ndS(3,2,2,1)=-3.8f; // GC/CA + ndH(4,1,1,2)=3.4f*1000; ndS(4,1,1,2)=8.0f; // TA/AC + ndH(4,2,1,1)=7.6f*1000; ndS(4,2,1,1)=20.2f; // TC/AA + + // C-T Mismatches + ndH(1,2,4,4)=0.7f*1000; ndS(1,2,4,4)=0.2f; // AC/TT + ndH(1,4,4,2)=-1.2f*1000; ndS(1,4,4,2)=-6.2f; // AT/TC + ndH(2,1,4,4)=1.0f*1000; ndS(2,1,4,4)=0.7f; // CA/TT + ndH(2,2,3,4)=-0.8f*1000; ndS(2,2,3,4)=-4.5f; // CC/GT + ndH(2,2,4,3)=5.2f*1000; ndS(2,2,4,3)=13.5f; // CC/TG + ndH(2,3,4,2)=-1.5f*1000; ndS(2,3,4,2)=-6.1f; // CG/TC + ndH(2,4,3,2)=-1.5f*1000; ndS(2,4,3,2)=-6.1f; // CT/GC + ndH(2,4,4,1)=-1.2f*1000; ndS(2,4,4,1)=-6.2f; // CT/TA + ndH(3,2,2,4)=2.3f*1000; ndS(3,2,2,4)=5.4f; // GC/CT + ndH(3,4,2,2)=5.2f*1000; ndS(3,4,2,2)=13.5f; // GT/CC + ndH(4,1,2,4)=1.2f*1000; ndS(4,1,2,4)=0.7f; // TA/CT + ndH(4,2,2,3)=2.3f*1000; ndS(4,2,2,3)=5.4f; // TC/CG + ndH(4,2,1,4)=1.2f*1000; ndS(4,2,1,4)=0.7f; // TC/AT + ndH(4,3,2,2)=-0.8f*1000; ndS(4,3,2,2)=-4.5f; // TG/CC + ndH(4,4,2,1)=0.7f*1000; ndS(4,4,2,1)=0.2f; // TT/CA + ndH(4,4,1,2)=1.0f*1000; ndS(4,4,1,2)=0.7f; // TT/AC + + // G-A Mismatches + ndH(1,1,3,4)=3.0f*1000; ndS(1,1,3,4)=7.4f; // AA/GT + ndH(1,1,4,3)=-0.6f*1000; ndS(1,1,4,3)=-2.3f; // AA/TG + ndH(1,2,3,3)=0.5f*1000; ndS(1,2,3,3)=3.2f; // AC/GG + ndH(1,3,3,2)=-4.0f*1000; ndS(1,3,3,2)=-13.2f; // AG/GC + ndH(1,3,4,1)=-0.7f*1000; ndS(1,3,4,1)=-2.3f; // AG/TA + ndH(1,4,3,1)=-0.7f*1000; ndS(1,4,3,1)=-2.3f; // AT/GA + ndH(2,1,3,3)=-0.7f*1000; ndS(2,1,3,3)=-2.3f; // CA/GG + ndH(2,3,3,1)=-4.0f*1000; ndS(2,3,3,1)=-13.2f; // CG/GA + ndH(3,1,1,4)=0.7f*1000; ndS(3,1,1,4)=0.7f; // GA/AT + ndH(3,1,2,3)=-0.6f*1000; ndS(3,1,2,3)=-1.0f; // GA/CG + ndH(3,2,1,3)=-0.6f*1000; ndS(3,2,1,3)=-1.0f; // GC/AG + ndH(3,3,1,2)=-0.7f*1000; ndS(3,3,1,2)=-2.3f; // GG/AC + ndH(3,3,2,1)=0.5f*1000; ndS(3,3,2,1)=3.2f; // GG/CA + ndH(3,4,1,1)=-0.6f*1000; ndS(3,4,1,1)=-2.3f; // GT/AA + ndH(4,1,1,3)=0.7f*1000; ndS(4,1,1,3)=0.7f; // TA/AG + ndH(4,3,1,1)=3.0f*1000; ndS(4,3,1,1)=7.4f; // TG/AA + + // G-T Mismatches + ndH(1,3,4,4)=1.0f*1000; ndS(1,3,4,4)=0.9f; // AG/TT + ndH(1,4,4,3)=-2.5f*1000; ndS(1,4,4,3)=-8.3f; // AT/TG + ndH(2,3,3,4)=-4.1f*1000; ndS(2,3,3,4)=-11.7f; // CG/GT + ndH(2,4,3,3)=-2.8f*1000; ndS(2,4,3,3)=-8.0f; // CT/GG + ndH(3,1,4,4)=-1.3f*1000; ndS(3,1,4,4)=-5.3f; // GA/TT + ndH(3,2,4,3)=-4.4f*1000; ndS(3,2,4,3)=-12.3f; // GC/TG + ndH(3,3,2,4)=3.3f*1000; ndS(3,3,2,4)=10.4f; // GG/CT + ndH(3,3,4,2)=-2.8f*1000; ndS(3,3,4,2)=-8.0f; // GG/TC +// ndH(3,3,4,4)=5.8f*1000; ndS(3,3,4,4)=16.3f; // GG/TT + ndH(3,4,2,3)=-4.4f*1000; ndS(3,4,2,3)=-12.3f; // GT/CG + ndH(3,4,4,1)=-2.5f*1000; ndS(3,4,4,1)=-8.3f; // GT/TA +// ndH(3,4,4,3)=4.1f*1000; ndS(3,4,4,3)=9.5f; // GT/TG + ndH(4,1,3,4)=-0.1f*1000; ndS(4,1,3,4)=-1.7f; // TA/GT + ndH(4,2,3,3)=3.3f*1000; ndS(4,2,3,3)=10.4f; // TC/GG + ndH(4,3,1,4)=-0.1f*1000; ndS(4,3,1,4)=-1.7f; // TG/AT + ndH(4,3,3,2)=-4.1f*1000; ndS(4,3,3,2)=-11.7f; // TG/GC +// ndH(4,3,3,4)=-1.4f*1000; ndS(4,3,3,4)=-6.2f; // TG/GT + ndH(4,4,1,3)=-1.3f*1000; ndS(4,4,1,3)=-5.3f; // TT/AG + ndH(4,4,3,1)=1.0f*1000; ndS(4,4,3,1)=0.9f; // TT/GA +// ndH(4,4,3,3)=5.8f*1000; ndS(4,4,3,3)=16.3f; // TT/GG + + // A-A Mismatches + ndH(1,1,1,4)=4.7f*1000; ndS(1,1,1,4)=12.9f; // AA/AT + ndH(1,1,4,1)=1.2f*1000; ndS(1,1,4,1)=1.7f; // AA/TA + ndH(1,2,1,3)=-2.9f*1000; ndS(1,2,1,3)=-9.8f; // AC/AG + ndH(1,3,1,2)=-0.9f*1000; ndS(1,3,1,2)=-4.2f; // AG/AC + ndH(1,4,1,1)=1.2f*1000; ndS(1,4,1,1)=1.7f; // AT/AA + ndH(2,1,3,1)=-0.9f*1000; ndS(2,1,3,1)=-4.2f; // CA/GA + ndH(3,1,2,1)=-2.9f*1000; ndS(3,1,2,1)=-9.8f; // GA/CA + ndH(4,1,1,1)=4.7f*1000; ndS(4,1,1,1)=12.9f; // TA/AA + + // C-C Mismatches + ndH(1,2,4,2)=0.0f*1000; ndS(1,2,4,2)=-4.4f; // AC/TC + ndH(2,1,2,4)=6.1f*1000; ndS(2,1,2,4)=16.4f; // CA/CT + ndH(2,2,2,3)=3.6f*1000; ndS(2,2,2,3)=8.9f; // CC/CG + ndH(2,2,3,2)=-1.5f*1000; ndS(2,2,3,2)=-7.2f; // CC/GC + ndH(2,3,2,2)=-1.5f*1000; ndS(2,3,2,2)=-7.2f; // CG/CC + ndH(2,4,2,1)=0.0f*1000; ndS(2,4,2,1)=-4.4f; // CT/CA + ndH(3,2,2,2)=3.6f*1000; ndS(3,2,2,2)=8.9f; // GC/CC + ndH(4,2,1,2)=6.1f*1000; ndS(4,2,1,2)=16.4f; // TC/AC + + // G-G Mismatches + ndH(1,3,4,3)=-3.1f*1000; ndS(1,3,4,3)=-9.5f; // AG/TG + ndH(2,3,3,3)=-4.9f*1000; ndS(2,3,3,3)=-15.3f; // CG/GG + ndH(3,1,3,4)=1.6f*1000; ndS(3,1,3,4)=3.6f; // GA/GT + ndH(3,2,3,3)=-6.0f*1000; ndS(3,2,3,3)=-15.8f; // GC/GG + ndH(3,3,2,3)=-6.0f*1000; ndS(3,3,2,3)=-15.8f; // GG/CG + ndH(3,3,3,2)=-4.9f*1000; ndS(3,3,3,2)=-15.3f; // GG/GC + ndH(3,4,3,1)=-3.1f*1000; ndS(3,4,3,1)=-9.5f; // GT/GA + ndH(4,3,1,3)=1.6f*1000; ndS(4,3,1,3)=3.6f; // TG/AG + + // T-T Mismatches + ndH(1,4,4,4)=-2.7f*1000; ndS(1,4,4,4)=-10.8f; // AT/TT + ndH(2,4,3,4)=-5.0f*1000; ndS(2,4,3,4)=-15.8f; // CT/GT + ndH(3,4,2,4)=-2.2f*1000; ndS(3,4,2,4)=-8.4f; // GT/CT + ndH(4,1,4,4)=0.2f*1000; ndS(4,1,4,4)=-1.5f; // TA/TT + ndH(4,2,4,3)=-2.2f*1000; ndS(4,2,4,3)=-8.4f; // TC/TG + ndH(4,3,4,2)=-5.0f*1000; ndS(4,3,4,2)=-15.8f; // TG/TC + ndH(4,4,1,4)=0.2f*1000; ndS(4,4,1,4)=-1.5f; // TT/AT + ndH(4,4,4,1)=-2.7f*1000; ndS(4,4,4,1)=-10.8f; // TT/TA + + // Dangling Ends + ndH(5,1,1,4)=-0.7f*1000; ndS(5,1,1,4)=-0.8f; // $A/AT + ndH(5,1,2,4)=4.4f*1000; ndS(5,1,2,4)=14.9f; // $A/CT + ndH(5,1,3,4)=-1.6f*1000; ndS(5,1,3,4)=-3.6f; // $A/GT + ndH(5,1,4,4)=2.9f*1000; ndS(5,1,4,4)=10.4f; // $A/TT + ndH(5,2,1,3)=-2.1f*1000; ndS(5,2,1,3)=-3.9f; // $C/AG + ndH(5,2,2,3)=-0.2f*1000; ndS(5,2,2,3)=-0.1f; // $C/CG + ndH(5,2,3,3)=-3.9f*1000; ndS(5,2,3,3)=-11.2f; // $C/GG + ndH(5,2,4,3)=-4.4f*1000; ndS(5,2,4,3)=-13.1f; // $C/TG + ndH(5,3,1,2)=-5.9f*1000; ndS(5,3,1,2)=-16.5f; // $G/AC + ndH(5,3,2,2)=-2.6f*1000; ndS(5,3,2,2)=-7.4f; // $G/CC + ndH(5,3,3,2)=-3.2f*1000; ndS(5,3,3,2)=-10.4f; // $G/GC + ndH(5,3,4,2)=-5.2f*1000; ndS(5,3,4,2)=-15.0f; // $G/TC + ndH(5,4,1,1)=-0.5f*1000; ndS(5,4,1,1)=-1.1f; // $T/AA + ndH(5,4,2,1)=4.7f*1000; ndS(5,4,2,1)=14.2f; // $T/CA + ndH(5,4,3,1)=-4.1f*1000; ndS(5,4,3,1)=-13.1f; // $T/GA + ndH(5,4,4,1)=-3.8f*1000; ndS(5,4,4,1)=-12.6f; // $T/TA + ndH(1,5,4,1)=-2.9f*1000; ndS(1,5,4,1)=-7.6f; // A$/TA + ndH(1,5,4,2)=-4.1f*1000; ndS(1,5,4,2)=-13.0f; // A$/TC + ndH(1,5,4,3)=-4.2f*1000; ndS(1,5,4,3)=-15.0f; // A$/TG + ndH(1,5,4,4)=-0.2f*1000; ndS(1,5,4,4)=-0.5f; // A$/TT + ndH(1,1,5,4)=0.2f*1000; ndS(1,1,5,4)=2.3f; // AA/$T + ndH(1,1,4,5)=-0.5f*1000; ndS(1,1,4,5)=-1.1f; // AA/T$ + ndH(1,2,5,3)=-6.3f*1000; ndS(1,2,5,3)=-17.1f; // AC/$G + ndH(1,2,4,5)=4.7f*1000; ndS(1,2,4,5)=14.2f; // AC/T$ + ndH(1,3,5,2)=-3.7f*1000; ndS(1,3,5,2)=-10.0f; // AG/$C + ndH(1,3,4,5)=-4.1f*1000; ndS(1,3,4,5)=-13.1f; // AG/T$ + ndH(1,4,5,1)=-2.9f*1000; ndS(1,4,5,1)=-7.6f; // AT/$A + ndH(1,4,4,5)=-3.8f*1000; ndS(1,4,4,5)=-12.6f; // AT/T$ + ndH(2,5,3,1)=-3.7f*1000; ndS(2,5,3,1)=-10.0f; // C$/GA + ndH(2,5,3,2)=-4.0f*1000; ndS(2,5,3,2)=-11.9f; // C$/GC + ndH(2,5,3,3)=-3.9f*1000; ndS(2,5,3,3)=-10.9f; // C$/GG + ndH(2,5,3,4)=-4.9f*1000; ndS(2,5,3,4)=-13.8f; // C$/GT + ndH(2,1,5,4)=0.6f*1000; ndS(2,1,5,4)=3.3f; // CA/$T + ndH(2,1,3,5)=-5.9f*1000; ndS(2,1,3,5)=-16.5f; // CA/G$ + ndH(2,2,5,3)=-4.4f*1000; ndS(2,2,5,3)=-12.6f; // CC/$G + ndH(2,2,3,5)=-2.6f*1000; ndS(2,2,3,5)=-7.4f; // CC/G$ + ndH(2,3,5,2)=-4.0f*1000; ndS(2,3,5,2)=-11.9f; // CG/$C + ndH(2,3,3,5)=-3.2f*1000; ndS(2,3,3,5)=-10.4f; // CG/G$ + ndH(2,4,5,1)=-4.1f*1000; ndS(2,4,5,1)=-13.0f; // CT/$A + ndH(2,4,3,5)=-5.2f*1000; ndS(2,4,3,5)=-15.0f; // CT/G$ + ndH(3,5,2,1)=-6.3f*1000; ndS(3,5,2,1)=-17.1f; // G$/CA + ndH(3,5,2,2)=-4.4f*1000; ndS(3,5,2,2)=-12.6f; // G$/CC + ndH(3,5,2,3)=-5.1f*1000; ndS(3,5,2,3)=-14.0f; // G$/CG + ndH(3,5,2,4)=-4.0f*1000; ndS(3,5,2,4)=-10.9f; // G$/CT + ndH(3,1,5,4)=-1.1f*1000; ndS(3,1,5,4)=-1.6f; // GA/$T + ndH(3,1,2,5)=-2.1f*1000; ndS(3,1,2,5)=-3.9f; // GA/C$ + ndH(3,2,5,3)=-5.1f*1000; ndS(3,2,5,3)=-14.0f; // GC/$G + ndH(3,2,2,5)=-0.2f*1000; ndS(3,2,2,5)=-0.1f; // GC/C$ + ndH(3,3,5,2)=-3.9f*1000; ndS(3,3,5,2)=-10.9f; // GG/$C + ndH(3,3,2,5)=-3.9f*1000; ndS(3,3,2,5)=-11.2f; // GG/C$ + ndH(3,4,5,1)=-4.2f*1000; ndS(3,4,5,1)=-15.0f; // GT/$A + ndH(3,4,2,5)=-4.4f*1000; ndS(3,4,2,5)=-13.1f; // GT/C$ + ndH(4,5,1,1)=0.2f*1000; ndS(4,5,1,1)=2.3f; // T$/AA + ndH(4,5,1,2)=0.6f*1000; ndS(4,5,1,2)=3.3f; // T$/AC + ndH(4,5,1,3)=-1.1f*1000; ndS(4,5,1,3)=-1.6f; // T$/AG + ndH(4,5,1,4)=-6.9f*1000; ndS(4,5,1,4)=-20.0f; // T$/AT + ndH(4,1,5,4)=-6.9f*1000; ndS(4,1,5,4)=-20.0f; // TA/$T + ndH(4,1,1,5)=-0.7f*1000; ndS(4,1,1,5)=-0.7f; // TA/A$ + ndH(4,2,5,3)=-4.0f*1000; ndS(4,2,5,3)=-10.9f; // TC/$G + ndH(4,2,1,5)=4.4f*1000; ndS(4,2,1,5)=14.9f; // TC/A$ + ndH(4,3,5,2)=-4.9f*1000; ndS(4,3,5,2)=-13.8f; // TG/$C + ndH(4,3,1,5)=-1.6f*1000; ndS(4,3,1,5)=-3.6f; // TG/A$ + ndH(4,4,5,1)=-0.2f*1000; ndS(4,4,5,1)=-0.5f; // TT/$A + ndH(4,4,1,5)=2.9f*1000; ndS(4,4,1,5)=10.4f; // TT/A$ + + return; +} + +void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2) +{ + float l1 = strlen(s1); + float l2 = strlen(s2); + if(l1gcContent = nparam_CountGCContent(s1); + nparm->gcContent /= (l1-2); + } else if (l1>l2) { + nparm->gcContent = nparam_CountGCContent(s2); + nparm->gcContent /= (l2-2); + } else { + nparm->gcContent = nparam_CountGCContent(s1)+nparam_CountGCContent(s2); + nparm->gcContent /= (l1+l2-4); + } +} + +float nparam_CountGCContent(char * seq ) { + int lseq = strlen(seq); + int k; + float count = 0; + for( k=0;k T + return 4; + if (mychar==2) // C -> G + return 3; + if (mychar==3) // G -> T + return 2; + if (mychar==4) // T -> A + return 1; + if (mychar==5) // $ -> $ + return 5; + if (!asnum) + { + if (mychar=='A') // A -> T + return 'T'; + if (mychar=='C') // C -> G + return 'G'; + if (mychar=='G') // G -> T + return 'C'; + if (mychar=='T') // T -> A + return 'A'; + if (mychar=='$') // $ -> $ + return '$'; + } + else + { + if (mychar=='A') // A -> T + return 4; + if (mychar=='C') // C -> G + return 3; + if (mychar=='G') // G -> T + return 2; + if (mychar=='T') // T -> A + return 1; + if (mychar=='$') // $ -> $ + return 5; + } + return '*'; +} + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: static bool CNNParams::isMismatch(char,char) +// +// PURPOSE: Return true if char1 and char2 are not watson-crick pair +// +// PARAMETERS: +// char1 - first character +// char2 - second character +// +// RETURN VALUE: +// bool - true if char1,char2 are Watson-Crick, false otw +// +// REVISION HISTORY +// $ 00sep28 : created LK +// #$ + +int nparam_isMismatch(char c1,char c2) +{ + if (c1+c2==5) + return 0; + else + return 1; +} + + + char nparam_convertNum(char c) + { + if (c=='A') + return 1; + if (c=='C') + return 2; + if (c=='G') + return 3; + if (c=='T') + return 4; + if (c=='$') + return 5; + return 0; + } + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::GetEntropy(char,char,char,char) + // + // PURPOSE: Retrieve Entropy for given NN-Pair from parameter table + // + // PARAMETERS: x0,x1,y0,y1: Pairs to look up in form + // 5'-x0-x1-3' / 3'-y0-y1-5' + // + // RETURN VALUE: + // float: Entropy dS for given NN pair + // + // REVISION HISTORY + // $ 00sep06 : created LK + // 00dec29 : included dangling end parameters + // 01feb07 : rewritten. looks ugly now, but is FAST! inline. + // #$ + + float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1) + { + char nx0=nparam_convertNum(x0); + char nx1=nparam_convertNum(x1); + char ny0=nparam_convertNum(y0); + char ny1=nparam_convertNum(y1); + float answer = ndS(nx0,nx1,ny0,ny1); + //Salt correction Santalucia + if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) { + if(nx0!=5 && 1<= nx1 && nx1<=4) { + answer += 0.5*nparm->kfac; + } + if(ny1!=5 && 1<= ny0 && ny0<=4) { + answer += 0.5*nparm->kfac; + } + } + //Salt correction Owczarzy + if (nparm->saltMethod == SALT_METHOD_OWCZARZY) { + float logk = log(nparm->kplus); + answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk); + } + return answer; + } + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::GetEnthalpy(char,char,char,char) + // + // PURPOSE: Retrieve Enthalpy for given NN-Pair from parameter table + // + // PARAMETERS: x0,x1,y0,y1: Pairs to look up in form + // 5'-x0-x1-3' / 3'-y0-y1-5' + // + // RETURN VALUE: + // float: Enthalpy dH for given NN pair + // + // REVISION HISTORY + // $ 00sep06 : created LK + // $ 00dec29 : included dangling end parameters + // $ 01feb07 : rewritten. looks ugly now, but is FAST! inline. + // #$ + + float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1) + { + char nx0=nparam_convertNum(x0); + char nx1=nparam_convertNum(x1); + char ny0=nparam_convertNum(y0); + char ny1=nparam_convertNum(y1); + return ndH(nx0,nx1,ny0,ny1); + } + + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::CalcTM(float entropy,float enthalpy) + // + // PURPOSE: Return melting temperature TM for given entropy and enthalpy + // Assuming a one-state transition and using the formula + // TM = dH / (dS + R ln(Ct/4)) + // entropy = dS + R ln Ct/4 (must already be included!) + // enthaklpy = dH + // where + // dH = enthalpy + // dS = entropy + // R = Boltzmann factor + // Ct = Strand Concentration + // + // PARAMETERS: + // + // RETURN VALUE: + // + // REVISION HISTORY + // $ 00sep06 : created LK + // $ 01jan07 : modified and corrected + // $ 01feb07 : optimized!!! inline + // $ 01feb09 : changed to include r ln ct in entropy!!! + // #$ + + float nparam_CalcTM(float entropy,float enthalpy) + { + float tm = 0; // absolute zero - return if model fails! + if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact)) + return 0; + if (entropy<0) // avoid division by zero and model errors! + { + tm = enthalpy/entropy;// - kfac; //LKFEB + if (tm<0) + return 0; + } + return tm; + } + + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: void CNNParams::AlterTM(float) + // + // PURPOSE: TM can be altered by a new assignment + // + // PARAMETERS: tm_new: new value for TM + // + // RETURN VALUE: + // + // REVISION HISTORY + // + // #$ + + void nparam_AlterTM(PNNParams nparm, float tm_new) + { + nparm->new_TM = tm_new; + return; + } + + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::CalcG(float entropy, float enthalpy) + // + // PURPOSE: return free energy G for given entropy and enthalpy + // Assuming a one-state transition and using the formula + // G = dH - new_TM * dS + // dH = enthalpy + // dS = entropy + // new_TM = value for the optimal melting temperature of + // the last iteration + // + // PARAMETERS: entropy and enthalpy + // + // RETURN VALUE: free Energy value (dG) for the + // + // REVISION HISTORY + // + // #$ + + float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy) + { + float freeEnergy = -999999999; + if (enthalpy>=forbidden_enthalpy) + return -999999999; + if (entropy<0) // avoid division by zero and model errors! + { + entropy = entropy * -1; + enthalpy = enthalpy * -1; + freeEnergy = enthalpy - nparm->new_TM * entropy; + } + return freeEnergy; + } + + + + + /////////////////////////////////////////////////////////////////////////////// + // inline function + /////////////////////////////////////////////////////////////////////////////// + // FUNCTION: float CNNParams::CalcSelfTM(char*) + // + // PURPOSE: Calculate TM for given sequence against its complement + // + // PARAMETERS: + // char* - Sequence to consider + // + // RETURN VALUE: + // float - Melting temperature in degrees Kelvin + // + // REVISION HISTORY + // $ 01jan09 LK : created + // $ 01feb07 LK : inline. + // #$ + + float nparam_CalcSelfTM(PNNParams nparm, char* seq) + { + float thedH = 0; + float thedS = nparam_GetInitialEntropy(nparm); + char c1; + char c2; + char c3; + char c4; + unsigned int i; + for ( i=1;i5) + { + if (c3=='A') + c3=1; + if (c3=='C') + c3=2; + if (c3=='G') + c3=3; + if (c3=='T') + c3=4; + if (c3=='$') + c3=5; + } + if (c4>5) + { + if (c4=='A') + c4=1; + if (c4=='C') + c4=2; + if (c4=='G') + c4=3; + if (c4=='T') + c4=4; + if (c4=='$') + c4=5; + } + + thedH += nparam_GetEnthalpy(nparm, c3,c4,c1,c2); + thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2); + } + + return nparam_CalcTM(thedS,thedH); + } + + float nparam_GetInitialEntropy(PNNParams nparm) + { + return -5.9f+nparm->rlogc; + } + + diff --git a/src/libthermo/nnparams.h b/src/libthermo/nnparams.h new file mode 100644 index 0000000..01948c7 --- /dev/null +++ b/src/libthermo/nnparams.h @@ -0,0 +1,104 @@ +/* + * nnparams.h + * PHunterLib + * + * Created by Tiayyba Riaz on 7/2/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//============================================================================= +// Module: nnparams.h +// Project: Diploma Thesis - Probe Selection for DNA Microarrays +// Type: header file - Nearest Neighbor Parameters / Model. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class CNNParams - Nearest Neighbor Model Parameters +// Author: kaderali +// Date: 9-12/2000 +// Copyright: (c) L. Kaderali, 9/2000 - 12/2000 +// +// Revision History +// $ 00sep07 LK : created +// 00dec29 LK : changed to include dangling end data +// 01jan09 LK : included CalcSelfTM function +// 01feb07 LK : optimized +// #$ +//============================================================================= + +#if !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_) +#define AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include +#include + +#ifdef _pack +#pragma pack(1) +#endif + + +// following defines to simplify coding... +#define ndH(a,b,c,d) nparm->dH[a][b][c][d] +#define ndS(a,b,c,d) nparm->dS[a][b][c][d] +#define forbidden_enthalpy 1000000000000000000.0f +//#define forbidden_enthalpy_div1000 1000000000000000.0f +// forbidden entropy=-rlogc +// #define forbidden_entropy 30.205986374220235304486574573422f +// Boltzmann factor (cal/degrees C*mol) +#define R 1.987f +#define SALT_METHOD_SANTALUCIA 1 +#define SALT_METHOD_OWCZARZY 2 +// Strand concentration (assumption!) (M) +// #define Ct 0.000001f +// r*ln(ct/4) as required by many formulas +//#define rlogc -30.205986374220235304486574573422f +extern float forbidden_entropy; + +//----------------------------------------------------------------------------- +// class CNNParams +//typedef class CNNParams* PNNParams; + +//#pragma GCC visibility push(hidden) +typedef struct CNNParams_st +{ +//public: + float Ct1; + float Ct2; + float rlogc; + float kplus; + float kfac; + int saltMethod; + float gcContent; + float new_TM; // ge‰ndert von ML!!! + //CNNParams(); + //virtual ~CNNParams(); +//private: + float dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign) + float dS[6][6][6][6]; +}CNNParams, * PNNParams; + + //void nparam_InitParams(PNNParams nparm, float c1=0.000001f, float c2=0.000001f, float kp=1, int sm = SALT_METHOD_SANTALUCIA ); + void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm); + void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2); + float nparam_CountGCContent(char * seq ); + char nparam_convertNum(char c); + float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1); + float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1); + float nparam_CalcTM(float entropy,float enthalpy); + void nparam_AlterTM(PNNParams nparm, float tm_new); + float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy); + float nparam_CalcSelfTM(PNNParams nparm, char* seq); + float nparam_GetInitialEntropy(PNNParams nparm) ; + char nparam_getComplement(char mychar, int asnum); + int nparam_isMismatch(char,char); + + + +//#pragma GCC visibility pop +#endif // !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_) diff --git a/src/libthermo/thermalign.P b/src/libthermo/thermalign.P new file mode 100644 index 0000000..0b9b4f3 --- /dev/null +++ b/src/libthermo/thermalign.P @@ -0,0 +1,16 @@ +thermalign.o thermalign.P : thermalign.c /usr/include/math.h \ + /usr/include/architecture/i386/math.h /usr/include/sys/cdefs.h \ + /usr/include/assert.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/stdlib.h \ + /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h thermalign.h \ + nnparams.h /usr/include/string.h diff --git a/src/libthermo/thermalign.c b/src/libthermo/thermalign.c new file mode 100644 index 0000000..7dc7826 --- /dev/null +++ b/src/libthermo/thermalign.c @@ -0,0 +1,716 @@ +/* + * thermalign.cpp + * PHunterLib + * + * Created by Tiayyba Riaz on 7/2/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +//============================================================================= +// Module: thermalign.cpp +// Project: Diploma Thesis - Probe Selection for DNA Microarrays +// Type: implementation - Thermodynamic Alignment. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class CThermAlign - Thermodynamic Alignment Algorithm +// Author: kaderali +// Date: 9/2000 - 01/2001 +// Copyright: (c) L. Kaderali, 9/2000 - 01/2001 +// +// Revision History +// $ 00sep04 LK : created +// $ 00dec30 LK : changed to do local alignment of probe against +// one entire sequence +// $ 01feb07 LK : optimized! +// $ 01aug06 LK : corrected, included salt and concentration input; +// $ made true local alignment (problem with initial mismatches!) +// #$ +//============================================================================= + +#include +#include +//#include +#include +#include + +#include "thermalign.h" + + + #define dH(a,b,c) (*(thrma->dH+((b)*(thrma->maxseq1len+1)*3+(a)*3+(c)))) + #define dS(a,b,c) (*(thrma->dS+((b)*(thrma->maxseq1len+1)*3+(a)*3+(c)))) + + +/////////////////////////////////////////////////////////////////////////////// +// Construction/Destruction +/////////////////////////////////////////////////////////////////////////////// + +//------------------------ Constructor ---------------------------------------- +// Initialize Thermalign Object. This will allocate memory for the dynamic +// programming matrices for both entropy and enthalpy of the present +// alignment. The max length of the two sequences to align has to be passed +// to the routine. Also, an initialized CNNParams Object must be passed to +// CThermAlign. Object will be initialized only once, sequences can be +// passed to it later. + +//int s1lmax,s2lmax; // global requiered for DEBUGGING + +void therm_initCThermAlign(PThermAlign thrma, int ALength, int BLength, PNNParams myNNParams) { + // Create dynamic programming matrices for entropy and enthalpy. + // The first two dimensions are for the two strings respectively, the + // third dimensions differentiates the alignment according to its end: + // type 0 Alignments will be those that align x(i+1) with y(j+1), + // type 1 Alignments align x(i+1) with a gap + // type 2 Alignments align y(i+1) with a gap + // All three of these types must separately be considered to select the + // optimum alignment in the next iteration. + // The alignment algorithm proceeds analogous to the well known + // Smith-Waterman algorithm for global alignments. + + thrma->dH = (float*)calloc ((ALength+1)*(BLength+1)*3, sizeof (float)); + thrma->dS = (float*)calloc ((ALength+1)*(BLength+1)*3, sizeof (float)); +// s1lmax=ALength; +// s2lmax=BLength; + + if ((thrma->dH==NULL)||(thrma->dS==NULL)) + { + printf("Fatal Error: Out of memory!\n"); + exit(-1); + } + + thrma->NNParams = myNNParams; + thrma->seq1len = ALength; + thrma->maxseq1len = thrma->seq1len; + thrma->seq2len = BLength; + + // set dH / dS entries all to zero! + #ifdef _pack + // this is FAST, but works only if #pragma pack(1) has been set... + memset(dH,0,(ALength+1)*(BLength+1)*3*sizeof(float)); + memset(dS,0,(ALength+1)*(BLength+1)*3*sizeof(float)); + #else + // the following is slower, but works also when padding bytes have + // been inserted in the array for alignment by the compiler... + // time is not really critical here, as this initialization is done + // only once... + int a, b; + for (a=0;a<=ALength;a++) + for (b=0;b<=BLength;b++) + { + dH(a,b,0)=0.0f; + dH(a,b,1)=0.0f; + dH(a,b,2)=0.0f; + dS(a,b,0)=0.0f; + dS(a,b,1)=0.0f; + dS(a,b,2)=0.0f; + } + #endif + therm_InitBorder(thrma); // also need to do only once... +// forbidden_entropy = (-(NNParams->R)*(float)log((NNParams->Ct)/4.0f)); + return; +} + +//------------------------ Destructor ----------------------------------------- +// Free memory + +void therm_finiCThermAlign(PThermAlign thrma) +{ + free (thrma->dH); + free (thrma->dS); + return; +} + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void CThermAlign::InitStrings(char* s1, +// char* s2, +// int s1len, +// int s2len); +// +// PURPOSE: Initialize strings for alignment table! +// +// PARAMETERS: +// s1: Target sequence +// s2: Probe sequence +// s1len: Target sequence length +// s2len: Probe sequence length +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 01jan03 LK : created +// $ 01feb07 LK : removed maximum local stuff +// #$ + +void therm_InitStrings(PThermAlign thrma, char* s1,char* s2,int s1len ,int s2len,int minxpos) +{ + thrma->seq1=s1; thrma->seq1len=s1len; + thrma->seq2=s2; thrma->seq2len=s2len; + nparam_UpdateParams(thrma->NNParams, s1,s2); +/* lk01feb07: removed maxloc-stuff + // if maximum local is not in reused subtable, need to reset! + if ((maxlocj>=minxpos)||(maxloci!=seq1len)) + { + maxloci=0; maxlocj=0; maxloctm=0; + } +*/ +// InitBorder(); //lk01feb07: removed, as it suffices if this is done once in constructor + return; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void CThermAlign::InitBorder(); +// +// PURPOSE: Initialize the lower and left border of the dynamic +// programming table. +// +// PARAMETERS: +// none +// +// RETURN VALUE: +// void +// +// REVISION HISTORY +// $ 00sep06 LK : created +// $ 00dec30 LK : complete revision - simply init table now, no +// more saving etc... +// $ 01feb07 LK : removed local alignment stuff and optimized +// #$ + +void therm_InitBorder(PThermAlign thrma) +{ + // Initialization for new alignment! + thrma->maxloctm = 0; // maximum melting temperature found (kelvin) + thrma->maxloci = 0; + thrma->maxlocj = 0; + thrma->maxloct = 0; + int i, j; + + for ( i=0; i<=thrma->seq1len; i++) + { + dH(i,0,0) = 0.0f; dH(i,0,1) = 0.0f; dH(i,0,2) = 0.0f; + dS(i,0,0) = nparam_GetInitialEntropy(thrma->NNParams); dS(i,0,1) = nparam_GetInitialEntropy(thrma->NNParams); + dS(i,0,2) = nparam_GetInitialEntropy(thrma->NNParams); + } + for ( j=1; j<=thrma->seq2len; j++) + { + dH(0,j,0) = 0.0f; dH(0,j,1) = 0.0f; dH(0,j,2) = 0.0f; + dS(0,j,0) = nparam_GetInitialEntropy(thrma->NNParams); dS(0,j,1) = nparam_GetInitialEntropy(thrma->NNParams); + dS(0,j,2) = nparam_GetInitialEntropy(thrma->NNParams); + } + + return; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: void CThermAlign::CalculateCell(int i, int j); +// +// PURPOSE: Calculate entropy and enthalpy for a given cell using a variant +// of the dynamic programming recursion, choosing greedy for +// tm->max +// x(i,j,0) = min/max {x(i-1,j-1,t) | for t in {0,1,2}} + newnn +// x(i,j,1) = min/max {x(i-1,j-1,0) + newnn, x(i-1,j,1)} +// x(i,j,2) = min/max {x(i-1,j-1,0) + newnn, x(i, j-1,2)} +// where newnn is the contribution of the newly added nearest +// neighbor term. The underlying assumption is that two +// consecutive gaps do not contribute to x. (Can however be +// easily changed, as long as an affine relationship exists +// between the length of the gap and the associated "cost"). +// To estimate the melting temperature TM from Enthalpy and +// Entropy, the following formula is being used: +// TM = dH / (dS + R ln CT) +// where +// TM is the temperature at which 50% of the duplex +// are dissociated +// dH is the Enthalpy +// dS is the Entropy +// R is the gas constant +// CT is the concentration (assumed constant) +// and ln means the natural logarithm. +// One drawback is that the two separate computations of +// Entropy and Enthalpy may result in different alignments. +// We do not want that, and thus pick the one with the higher +// melting temperature TM. However, it must be warned that +// depending on the underlying nearest neighbor parameters this +// may only approximate the "true" optimum result! +// +// PARAMETERS: +// int i, +// int j - coordinates of the dp cell to calculate (=position in +// string). Requieres that (i-1,j), (i,j-1), (i-1,j-1) +// are known. +// +// RETURN VALUE: +// - will be written directly to dH/dS array in this object +// +// REVISION HISTORY +// $ 00sep06 LK : created +// $ 00dec30 LK : modified to align probe with full target (local +// alignment) +// $ 01feb07 LK : optimized and removed local alignment stuff +// $ 01feb08 LK : optimized further and merged into CalculateTable +// #$ +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: CThermAlign::CalculateTable(); +// +// PURPOSE: Compute the entire dynamic programming table cell by cell. +// Requieres that InitBorder has been called. +// Computations will be done in the usual rowwise manner. +// +// PARAMETERS: +// int startRow: Row that computation shall commence at! +// +// RETURN VALUE: +// void - Data will be written directly to dH and dS in this object +// +// REVISION HISTORY +// $ 00sep06 : created LK +// 00dec31 : modified to begin at a specified row! +// #$ + +void therm_CalculateTable(PThermAlign thrma, int startRow) +{ +// #define forbidden_enthalpy 1000000000000000000.0f + register int i; + register int iminusone; + register int j; + register int jminusone; + // Determine previous character! + register char prevchari; + register char prevcharj; + register char seq1char; + register char seq2char; + register float entropy1; + register float entropy2; + register float entropy3; + register float enthalpy1; + register float enthalpy2; + register float enthalpy3; + register float tm1; + register float tm2; + register float tm3; + float maxtm; +// ofstream dboutstream("debug.out"); +// assert(startRow>0); + for (j=startRow; j<=thrma->seq2len; j++) + { + if (j>1) + prevcharj = thrma->seq2[j-2]; + else + prevcharj = 0; + jminusone = j - 1; + seq2char = thrma->seq2[jminusone]; + for (i=1; i<=thrma->seq1len; i++) + { + // local variables + if (i>1) + prevchari = thrma->seq1[i-2]; + else + prevchari = 0; + iminusone = i - 1; + seq1char=thrma->seq1[iminusone]; + + // --------------- Upper cell: Alignment of seq1[i] with seg2[j] + entropy1 = dS(iminusone,jminusone,0); + enthalpy1 = dH(iminusone,jminusone,0) ; + entropy2 = dS(iminusone,jminusone,1); + enthalpy2 = dH(iminusone,jminusone,1); + entropy3 = dS(iminusone,jminusone,2); + enthalpy3 = dH(iminusone,jminusone,2); + if (enthalpy1NNParams, prevchari, seq1char, prevcharj, seq2char); + enthalpy1 += nparam_GetEnthalpy(thrma->NNParams, prevchari, seq1char, prevcharj, seq2char); + } + if (enthalpy2NNParams, prevchari, seq1char, 0, seq2char); // 0 is gap! + entropy2 += nparam_GetEntropy(thrma->NNParams, prevchari, seq1char, 0, seq2char); + } + if (enthalpy3NNParams, 0, seq1char, prevcharj, seq2char); + entropy3 += nparam_GetEntropy(thrma->NNParams, 0, seq1char, prevcharj, seq2char); + } + // choose optimum combination + tm1 = nparam_CalcTM(entropy1, enthalpy1); + tm2 = nparam_CalcTM(entropy2, enthalpy2); + tm3 = nparam_CalcTM(entropy3, enthalpy3); + if ((tm1>=tm2)&&(tm1>=tm3)) + { + dS(i,j,0) = entropy1; + dH(i,j,0) = enthalpy1; + maxtm=tm1; + } + else if (tm2>=tm3) + { + dS(i,j,0) = entropy2; + dH(i,j,0) = enthalpy2; + maxtm=tm2; + } + else + { + dS(i,j,0) = entropy3; + dH(i,j,0) = enthalpy3; + maxtm=tm3; + } + // set to zero if alignment so far is mismatches only (tm will be zero -> no need to reset maxtm)! + if (dH(i,j,0)==0) + dS(i,j,0)=nparam_GetInitialEntropy(thrma->NNParams); + // check if local maximum found! + if (((i==thrma->seq1len)||(j==thrma->seq2len))&&(maxtm>=thrma->maxloctm)) + { + thrma->maxloctm=maxtm; + thrma->maxloci=i; + thrma->maxlocj=j; + thrma->maxloct=0; + } + // set to zero if alignment so far is mismatches only! + if (dH(i,j,0)==0) + dS(i,j,0)=nparam_GetInitialEntropy(thrma->NNParams); + + // --------------- Middle cell: Alignment of seq1[i] with '-' + // following changes lk 01jan08 + // we do not allow $- in the beginning, therefore set to forbidden if + // j=1 + if (j==1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + // also, disallow -$ in the end, therefore forbid if j=seq2len-1 + else if (j==thrma->seq2len-1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + else + { + // end lk01jan08 + // -- entropy + entropy1 = dS(iminusone,j,0); + entropy2 = dS(iminusone,j,1); + enthalpy1 = dH(iminusone,j,0); + enthalpy2 = dH(iminusone,j,1); + if (enthalpy1NNParams, prevchari, seq1char, seq2char, 0); + enthalpy1 += nparam_GetEnthalpy(thrma->NNParams, prevchari, seq1char, seq2char, 0); + } + if (enthalpy2NNParams, prevchari, seq1char, 0, 0); + enthalpy2 += nparam_GetEnthalpy(thrma->NNParams, prevchari, seq1char, 0, 0); + } + } + // -- choose optimum combination + tm1 = nparam_CalcTM(entropy1, enthalpy1); + tm2 = nparam_CalcTM(entropy2, enthalpy2); + if (tm1>=tm2) + { + dS(i,j,1)=entropy1; + dH(i,j,1)=enthalpy1; + maxtm=tm1; + } + else + { + dS(i,j,1)=entropy2; + dH(i,j,1)=enthalpy2; + maxtm=tm2; + } + // check if local maximum found! + if (((i==thrma->seq1len)||(j==thrma->seq2len))&&(maxtm>=thrma->maxloctm)) + { + thrma->maxloctm=maxtm; + thrma->maxloci=i; + thrma->maxlocj=j; + thrma->maxloct=1; + } + // set to zero if alignment so far is mismatches only! + if (dH(i,j,1)==0) + dS(i,j,1)=nparam_GetInitialEntropy(thrma->NNParams); + + // --------------- Lower cell: Alignment of '-' with seq2[j] + // following changes lk 01jan08 + // we do not allow $- in the beginning, therefore set to forbidden if + // i=1 + if (i==1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + // also, disallow -$ in the end, therefore forbid if i=seq1len-1 + else if (i==thrma->seq1len-1) + { + enthalpy1=forbidden_enthalpy; enthalpy2=forbidden_enthalpy; + entropy1=forbidden_entropy; entropy2=forbidden_entropy; + } + else + { + // end lk01jan08 + entropy1 = dS(i,jminusone,0); + entropy2 = dS(i,jminusone,2); + enthalpy1 = dH(i,jminusone,0); + enthalpy2 = dH(i,jminusone,2); + if (enthalpy1NNParams, seq1char, 0, prevcharj, seq2char); + enthalpy1 += nparam_GetEnthalpy(thrma->NNParams, seq1char, 0, prevcharj, seq2char); + } + if (enthalpy2NNParams, 0, 0, prevcharj, seq2char); + enthalpy2 += nparam_GetEnthalpy(thrma->NNParams, 0, 0, prevcharj, seq2char); + } + } + // -- choose optimum combination + tm1 = nparam_CalcTM(entropy1, enthalpy1); + tm2 = nparam_CalcTM(entropy2, enthalpy2); + if (tm1>=tm2) + { + dS(i,j,2)=entropy1; + dH(i,j,2)=enthalpy1; + maxtm=tm1; + } + else + { + dS(i,j,2)=entropy2; + dH(i,j,2)=enthalpy2; + maxtm=tm2; + } + // check if local maximum found! + if (((i==thrma->seq1len)||(j==thrma->seq2len))&&(maxtm>=thrma->maxloctm)) + { + thrma->maxloctm=maxtm; + thrma->maxloci=i; + thrma->maxlocj=j; + thrma->maxloct=2; + } + // set to zero if alignment so far is mismatches only! + if (dH(i,j,2)==0) + dS(i,j,2)=nparam_GetInitialEntropy(thrma->NNParams); + + + /* + dboutstream<<"After cell ("<=tm1)&&(tm0>=tm2)) + entropy = dS(i,j,0); + else if ((tm1>=tm0)&&(tm1>=tm2)) + entropy = dS(i,j,1); + else + entropy = dS(i,j,2); + + return entropy; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetEnthalpy(int i, int j); +// +// PURPOSE: Return Enthalpy for given cell +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Enthalpy (dS) +// +// REVISION HISTORY +// $ 00sep23 : created LK +// #$ + +float therm_GetEnthalpy(PThermAlign thrma, int i, int j) +{ + float enthalpy; + + // first, need to determine optimum values for dG and dH + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(dS(i,j,0),dH(i,j,0)); + tm1 = nparam_CalcTM(dS(i,j,1),dH(i,j,1)); + tm2 = nparam_CalcTM(dS(i,j,2),dH(i,j,2)); + if ((tm0>=tm1)&&(tm0>=tm2)) + enthalpy = dH(i,j,0); + else if ((tm1>=tm0)&&(tm1>=tm2)) + enthalpy = dH(i,j,1); + else + enthalpy = dH(i,j,2); + + return enthalpy; +} + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergy(int i, int j); +// +// PURPOSE: Return free Energy for given cell at melting temp +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - free Energy value (dG) at melting temperature +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetFreeEnergy(PThermAlign thrma, int i, int j) +{ + return therm_GetFreeEnergyK(thrma, i,j,therm_GetMeltingTempK(thrma, i,j)); +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergyK(int i, int j, float t); +// +// PURPOSE: Return free Energy for given cell at given temperature +// +// PARAMETERS: +// i, j - Cell coordinates +// t - Temperature in Kelvin +// +// RETURN VALUE: +// float - free Energy value (dG) at given temperature (Kelvin) +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetFreeEnergyK(PThermAlign thrma, int i, int j, float t) +{ + // dG = dH - T * dS + + float entropy; + float enthalpy; + + // first, need to determine optimum values for dG and dH + entropy = therm_GetEntropy(thrma, i,j); + enthalpy = therm_GetEnthalpy(thrma, i,j); + + return enthalpy - t * entropy; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float GetFreeEnergyC(int i, int j, int t); +// +// PURPOSE: Return free Energy for given cell at given temperature +// +// PARAMETERS: +// i, j - Cell coordinates +// t - Temperature in Celsius +// +// RETURN VALUE: +// float - free Energy value (dG) at melting temperature +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetFreeEnergyC(PThermAlign thrma, int i, int j, float t) +{ + // dG = dH - T * dS + + float entropy; + float enthalpy; + + // first, need to determine optimum values for dG and dH + entropy = therm_GetEntropy(thrma, i,j); + enthalpy = therm_GetEnthalpy(thrma, i,j); + + return enthalpy - (t + 273.15f) * entropy; +} + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float CThermAlign::GetMeltingTempC(int i, int j) +// +// PURPOSE: Calculate TM for given cell. Return in Celsius. +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Melting Temperature TM +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetMeltingTempC(PThermAlign thrma, int i, int j) +{ + return therm_GetMeltingTempK(thrma, i,j) - 273.15f; +} + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION: float CThermAlign::GetMeltingTempK(int i, int j) +// +// PURPOSE: Calculate TM for given cell. Return in Kelvin. +// +// PARAMETERS: +// i, j - Cell coordinates +// +// RETURN VALUE: +// float - Melting Temperature TM +// +// REVISION HISTORY +// $ 00sep06 : created LK +// #$ + +float therm_GetMeltingTempK(PThermAlign thrma, int i, int j) +{ + float tm0, tm1, tm2; + tm0 = nparam_CalcTM(dS(i,j,0), dH(i,j,0)); + tm1 = nparam_CalcTM(dS(i,j,1), dH(i,j,1)); + tm2 = nparam_CalcTM(dS(i,j,2), dH(i,j,2)); + float maxtm; + if ((tm0>=tm1)&&(tm0>=tm2)) + maxtm=tm0; + else if ((tm1>=tm0)&&(tm1>=tm2)) + maxtm=tm1; + else + maxtm=tm2; + if (maxtm<0) + maxtm=0; + return maxtm; +} + diff --git a/src/libthermo/thermalign.h b/src/libthermo/thermalign.h new file mode 100644 index 0000000..65f368a --- /dev/null +++ b/src/libthermo/thermalign.h @@ -0,0 +1,105 @@ +//============================================================================= +// Module: thermalign.h +// Project: Diploma Thesis - Probe Selection for DNA Microarrays +// Type: header file - Thermodynamic Alignment. +// Language: c++ +// Compiler: microsoft visual c++ 6.0, unix/linux gcc +// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested) +// Database: none +// Description: class CThermAlign - Thermodynamic Alignment Algorithm +// Author: kaderali +// Date: 9/2000 - 12/2000 +// Copyright: (c) L. Kaderali, 9/2000 - 12/2000 +// +// Revision History +// $ 00sep04 LK : created +// $ 00dec30 LK : changed to do local alignment of probe against +// one entire sequence +// $ 01feb07 LK : optimized +// #$ +//============================================================================= + +#if !defined(AFX_THERMALIGN_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_) +#define AFX_THERMALIGN_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_ + +//#if _MSC_VER > 1000 +//#pragma once +//#endif // _MSC_VER > 1000 + +//#include +//#include +//#include +#include "nnparams.h" // Nearest Neighbor Parameters +//#include "../gsfxtree/gsfxtree.h" // Suffix Tree Stuff + +//using namespace std; + +//#ifdef _pack +//#pragma pack(1) +//#endif + +//----------------------------------------------------------------------------- +// class CThermAlign +//#pragma GCC visibility push(hidden) +//typedef class CThermAlign* PThermAlign; + +typedef struct CThermAlign_t +{ +//public: + #ifdef _output_alignment +/* void printAlignment(ostream &outputStream); + void OutputAlignment(ostream &outputStream); + bool OutputAlignment(ostream &outputStream, int, int, int, bool local=false); + void OutputLocalAlignment(ostream &outputStream); + void PrintDPTable(ostream&);*/ + #endif +// lk01feb07: removed maxlocstuff as not requiered by thermtreealign... + float maxloctm; // maximum local temperature found + int maxloci; // i position thereof + int maxlocj; // j position thereof + int maxloct; // and type of maximum alignment! + int targetNumber; // id number of target sequence +// PGSfxLeaf probeNode; // identifier for probe sequence + char *seq1; // Sequence 1 + char *seq2; // Sequence 2 + int seq1len; // Length of Sequence 1 + int seq2len; // Length of Sequence 2 + int maxseq1len; // length of longest target... +//private: + float *dH; // Dynamic Programming Table for Entropy + float *dS; // Dynamic Programming Table for Enthalpy + PNNParams NNParams; // Nearest Neighbor parameters +// float forbidden_entropy; +#ifdef _output_alignment + //ostrstream *s1aptr, *s2aptr, *atypptr; +// Used to buffer aligned sequences (on output) + //ostrstream s1align; + //ostrstream s2align; + //ostrstream aligntype; // insert, deletion, match, unmatch (for output) + // same for local alignment: + /* removed lk00jan08: use global instead! + ostrstream ls1align; + ostrstream ls2align; + ostrstream laligntype; // insert, deletion, match, unmatch (for output) + */ +#endif +}CThermAlign, * PThermAlign; +//#pragma GCC visibility pop + +float therm_GetEntropy(PThermAlign thrma, int, int); +float therm_GetEnthalpy(PThermAlign thrma, int, int); +float therm_GetFreeEnergy(PThermAlign thrma, int, int); +float therm_GetFreeEnergyK(PThermAlign thrma, int, int, float); +float therm_GetFreeEnergyC(PThermAlign thrma, int, int, float); +float therm_GetMeltingTempC(PThermAlign thrma, int, int); +float therm_GetMeltingTempK(PThermAlign thrma, int, int); +void therm_initCThermAlign(PThermAlign thrma, int, int, PNNParams); +void therm_finiCThermAlign(PThermAlign thrma); +//void therm_InitStrings(PThermAlign thrma, char*,char*,int,int,int=0); +void therm_InitStrings(PThermAlign thrma, char*,char*,int,int,int); +void therm_InitBorder(PThermAlign thrma); +//void therm_CalculateTable(PThermAlign thrma, int=1); +void therm_CalculateTable(PThermAlign thrma, int); + + +#endif // !defined(AFX_THERMALIGN_H__1B9227F7_82AB_11D4_9FFF_000000000000__INCLUDED_)