Update to consider evolution of the language

2023-06-29 12:12:46 +02:00
parent 73236c72a8
commit 92826de147
18 changed files with 146 additions and 199 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,16 +1,14 @@
-
-# /src/
+*.o
 /src/*.P
 /src/*.a
 /src/ecoPrimer
-
-# /src/libecoPCR/
 /src/libecoPCR/*.P
 /src/libecoPCR/*.a
-
-# /src/libecoprimer/
+/src/libecoPCR/*.o
 /src/libecoprimer/*.P
 /src/libecoprimer/*.a
-
-# /src/libthermo/
+/src/libecoprimer/*.o
 /src/libthermo/*.P
+/src/libthermo/*.a
+/src/libthermo/*.o
+phylonorway.*
--- a/src/ecoprimer.c
+++ b/src/ecoprimer.c
@ -171,13 +171,9 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
 	bool_t good2=pair->p2->good;
 	bool_t goodtmp;
 	bool_t strand;
-	uint32_t i, j;
-	float temp;
-	CNNParams nnparams;
+	uint32_t i;

-	//nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
-
-	char *c;
+	const char *c;
 	char p1[32];
 	char p2[32];

@ -376,21 +372,20 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
 void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy, pecodnadb_t seqdb)
 {
   ppair_t* sortedpairs;
-   ppair_t* index;
+   // ppair_t* index;
   ppairlist_t pl;
   size_t i,j;
   size_t count;
   char *taxon[]={"taxon","taxa"};
   ecotx_t    *current_taxon;
   //pairset pair_sets;
-   pairset *pset = NULL;

   //printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");

   fprintf(stderr,"Total pair count : %d\n",pairs->count);

   sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs");
-   index=sortedpairs;
+   // index=sortedpairs;
   pl=pairs->first;
   j=0;
   while(pl->next)
@ -475,10 +470,10 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy,

   if (options->filter_on_links)
   {
-		fprintf (stderr, "Old size: %d, ", count);
+		fprintf (stderr, "Old size: %ld, ", count);
 		count = primers_changeSortedArray (&sortedpairs, count, options);
 		//count = primers_filterWithGivenLinks (&sortedpairs, count, options);
-		fprintf (stderr, "New size: %d\n", count);
+		fprintf (stderr, "New size: %ld\n", count);

 		if (count == 0)
 		{
@ -803,9 +798,9 @@ int main(int argc, char **argv)
    if (options.saltmethod != 2) //if not SALT_METHOD_OWCZARZY
    	options.saltmethod = SALT_METHOD_SANTALUCIA; //then force SALT_METHOD_SANTALUCIA

-    if (options.salt < 0.01 || options.salt > 0.3) //if salt value out of literature values
+    if (options.salt < 0.01 || options.salt > 0.3) {//if salt value out of literature values
    	options.salt = DEF_SALT; //set to default
-
+	}
 	nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,options.salt,options.saltmethod);

    fprintf(stderr,"Reading taxonomy database ...");
@ -851,7 +846,7 @@ int main(int argc, char **argv)
    fprintf(stderr,"\nIndexing words in sequences\n");

    words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
-    fprintf(stderr,"\n  Strict primer count : %d\n",words->size);
+    fprintf(stderr,"\n  Strict primer count : %lld\n",words->size);
    
    /*/TR Testing
    fprintf(stderr,"\nReducing for debugging\n");
@ -871,7 +866,7 @@ int main(int argc, char **argv)
    if (options.no_multi_match)
    {
    	(void)filterMultiStrictPrimer(words);
-    	fprintf(stderr,"\n  Strict primer with single match count : %d\n",words->size);
+    	fprintf(stderr,"\n  Strict primer with single match count : %lld\n",words->size);
    }


@ -921,7 +916,7 @@ pwordcount_t reduce_words_to_debug (pwordcount_t words, poptions_t options)
 {
 	uint32_t i, k;
 	pwordcount_t new_words;
-	char *rwrd;
+	const char *rwrd;
 	char dwrd[20];
 	/*char *strict_words[DEBUG_WORDS_CNT] = {"GAGTCTCTGCACCTATCC", "GCAATCCTGAGCCAAATC", "ACCCCTAACCACAACTCA", 
 			"TCCGAACCGACTGATGTT", "GAAGCTTGGGTGAAACTA", "GGAGAACCAGCTAGCTCT", "GCTGGTTCTCCCCGAAAT",
@ -981,7 +976,7 @@ pwordcount_t reduce_words_to_debug (pwordcount_t words, poptions_t options)

 void print_wordwith_positions (primer_t prm, uint32_t seqdbsize, poptions_t options)
 {
-	char *wrd;
+	const char *wrd;
 	uint32_t i, j;
 	char *twrd = "GCCTGTTTACCAAAAACA";

--- a/src/global.mk
+++ b/src/global.mk
@ -1,5 +1,5 @@
 MACHINE=MAC_OS_X
-LIBPATH= -LlibecoPCR -Llibecoprimer -Llibthermo
+LIBPATH= -LlibecoPCR -Llibecoprimer -Llibthermo -L/usr/local/lib
 MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<

 CC=gcc
--- a/src/libecoPCR/ecoMalloc.c
+++ b/src/libecoPCR/ecoMalloc.c
@ -2,7 +2,7 @@
 #include <stdlib.h>

 static int eco_log_malloc = 0;
-static size_t eco_amount_malloc=0;
+//static size_t eco_amount_malloc=0;
 static size_t eco_chunk_malloc=0;

 void    eco_trace_memory_allocation()
@ -37,7 +37,7 @@ void   *eco_malloc(int64_t chunksize,

 	if (eco_log_malloc)
 		fprintf(stderr,
-			    "Memory segment located at %p of size %d is allocated (file : %s [%d])",
+			    "Memory segment located at %p of size %lld is allocated (file : %s [%d])",
 			    chunk,
 			    chunksize,
 			    filename,
@ -65,7 +65,7 @@ void   *eco_realloc(void *chunk,

 	if (!newchunk)
           {
-        fprintf(stderr,"Requested memory : %d\n",newsize);
+        fprintf(stderr,"Requested memory : %lld\n",newsize);
 		ecoError(ECO_MEM_ERROR,error_message,filename,line);
           }
 	if (!chunk)
@ -73,7 +73,7 @@ void   *eco_realloc(void *chunk,

 	if (eco_log_malloc)
 		fprintf(stderr,
-			    "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
+			    "Old memory segment %p is reallocated at %p with a size of %lld (file : %s [%d])",
 			    chunk,
 			    newchunk,
 			    newsize,
--- a/src/libecoPCR/ecoPCR.h
+++ b/src/libecoPCR/ecoPCR.h
@ -21,8 +21,8 @@ typedef struct {
 	int32_t  taxid;
 	char     AC[20];
 	int32_t  DE_length;
-	int32_t  SQ_length;
-	int32_t  CSQ_length; /*what is this CSQ_length ? */
+	u_int32_t  SQ_length;
+	u_int32_t  CSQ_length; /*what is this CSQ_length ? */

 	char     data[1];

@ -30,7 +30,7 @@ typedef struct {

 typedef struct {
 	int32_t taxid;
-	int32_t SQ_length;
+	u_int32_t SQ_length;
 	int32_t isexample;
 	char    *AC;
 	char    *DE;
--- a/src/libecoPCR/ecoseq.c
+++ b/src/libecoPCR/ecoseq.c
@ -2,6 +2,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <zlib.h>
+#include <ctype.h>
 #include <string.h>
 #include <stdio.h>

--- a/src/libecoprimer/PrimerSets.c
+++ b/src/libecoprimer/PrimerSets.c
@ -1139,7 +1139,7 @@ void sets_by_SimulatedAnealing (pairset *pair_set,
 	{
 		pair_set = extend_set_randomly (NULL, &params, 3);
 		printf("\nStart Random seed set for Simulated :\n");
-		print_set_info (&pair_set, &params);
+		print_set_info (pair_set, &params);
 	}
 	min_spc = max_spc = pair_set->set_specificity;
 	min_cov = max_cov = pair_set->set_coverage;
@ -1566,7 +1566,8 @@ int32_t *addinset (int32_t *set, int32_t i, int32_t j, int32_t* slots, int32_t *
 size_t primers_changeSortedArray (ppair_t ** pairs,
 		size_t sorted_count, poptions_t options)
 {
-	int32_t i, j, k, l, total_links;
+	int32_t i, k, total_links;
+	u_int32_t j;
 	int *owi;
 	int *iwi;
 	int allowedtaxa;
@ -1579,14 +1580,14 @@ size_t primers_changeSortedArray (ppair_t ** pairs,
 	idx_set = ECOMALLOC(slots*sizeof (int32_t),
 		            "Could not allocate memory for index set.");

-	for (i=0; i<sorted_count; i++)
+	for (i=0; ((u_int32_t)i)<sorted_count; i++)
 	{
 		owi = sortedpairs[i]->wellIdentifiedSeqs;
 		passed = FALSE;

 		for (j=0; j<sorted_count; j++)
 		{
-			if (i == j) continue;
+			if ((u_int32_t)i == j) continue;

 			iwi = sortedpairs[j]->wellIdentifiedSeqs;
 			total_links = 0;
@ -1610,8 +1611,9 @@ size_t primers_changeSortedArray (ppair_t ** pairs,
 			if (options->max_links_percent > 0)
 			{
 				allowedtaxa = options->max_links_percent;
-				if (total_links > allowedtaxa)
+				if (total_links > allowedtaxa){
 					passed = TRUE;
+					}
 				break;
 			}
 			else
@ -1630,7 +1632,7 @@ size_t primers_changeSortedArray (ppair_t ** pairs,
 	for (j=0; j<sorted_count; j++)
 	{
 		for (k=0; k<index; k++)
-			if (j == idx_set[k]) break;
+			if (j == (u_int32_t)(idx_set[k])) break;
 		//need to remove this element
 		if (k == index)
 		{
@ -1711,9 +1713,9 @@ int32_t *addinset_withLinks (int32_t *set, int32_t i, int32_t* slots, int32_t *i
 size_t primers_filterWithGivenLinks (ppair_t ** pairs,
 		size_t sorted_count, poptions_t options)
 {
-	int32_t i, j, k;
+	int32_t i,  k;
+	u_int32_t j;
 	ppair_t *sortedpairs = *pairs;
-	bool_t passed;

 	int32_t *idx_set = NULL;
 	int32_t slots=50, index=0;
@ -1732,7 +1734,7 @@ size_t primers_filterWithGivenLinks (ppair_t ** pairs,
 	for (j=0; j<sorted_count; j++)
 	{
 		for (k=0; k<index; k++)
-			if (j == idx_set[k]) break;
+			if (j == (u_int32_t) (idx_set[k])) break;
 		//need to remove this element
 		if (k == index)
 		{
@ -1755,7 +1757,7 @@ size_t primers_filterWithGivenLinks (ppair_t ** pairs,
 	else i=sorted_count;


-	for (j=0; j<i; j++)
+	for (j=0; j<(u_int32_t)i; j++)
 		for (k=0; k<options->dbsize; k++)
 			if ((*pairs)[j]->coveredSeqs[k] == 1)
 				cov[k] = 1;
--- a/src/libecoprimer/ahocorasick.c
+++ b/src/libecoprimer/ahocorasick.c
@ -213,7 +213,6 @@ pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsi
 	char *base;
 	int8_t code;
 	uint32_t   goodPrimers=0;
-	static int iii=0;


 	//inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum);
--- a/src/libecoprimer/apat_search.c
+++ b/src/libecoprimer/apat_search.c
@ -36,7 +36,7 @@ int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat,
        ppatternParam_t param,
        StackiPtr stkpos)
 {
-        int32_t     pos;
+        u_int32_t     pos;
        uint32_t    smask, r;
        uint8_t     *data;
        int32_t    end;
@ -84,7 +84,7 @@ int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
        StackiPtr stkpos)
 {
        int       e, found;
-        int32_t     pos;
+        u_int32_t     pos;
        uint32_t    smask, cmask, sindx;
        uint32_t    *pr, r[2 * MAX_PAT_ERR + 2];
        uint8_t     *data;
--- a/src/libecoprimer/aproxpattern.c
+++ b/src/libecoprimer/aproxpattern.c
@ -201,7 +201,7 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3
 		data[w].good = data[w].inexample >= inSequenceQuorum && data[w].outexample <= outSequenceQuorum;
 		goodPrimers+=data[w].good? 1:0;

-		fprintf(stderr,"Primers %5d/%d analyzed  => sequence : %s in %d example and %d counterexample sequences       \r",
+		fprintf(stderr,"Primers %5d/%lld analyzed  => sequence : %s in %d example and %d counterexample sequences       \r",
 				i+1,words->size,ecoUnhashWord(data[w].word,options->primer_length),
 				data[w].inexample,data[w].outexample);

@ -224,8 +224,8 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3
 		ECOFREE(data[w].reversePos,"Free direct count table");
 	}

-	fprintf(stderr,"\n\nOn %d analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
-	fprintf(stderr,"Conserved primers for further analysis : %d/%d\n",w,words->size);
+	fprintf(stderr,"\n\nOn %lld analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
+	fprintf(stderr,"Conserved primers for further analysis : %d/%lld\n",w,words->size);

 	primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table");
 	primers->primers=ECOREALLOC(data,
--- a/src/libecoprimer/hashencoder.h
+++ b/src/libecoprimer/hashencoder.h
@ -8,14 +8,6 @@
 #ifndef HASHENCODER_H_
 #define HASHENCODER_H_

-static int8_t encoder[] = {0,                                            // A
-		                   -1,                                           // b
-		                   1,                                            // C
-	       	               -1,-1,-1,                                     // d, e, f
-		                   2,                                            // G
-		                   -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,          // h,i,j,k,l,m,n,o,p,q,r,s
-		                   3,3,                                           // T,U
-		                   -1,-1,-1,-1,-1};                              // v,w,x,y,z
-
+extern int8_t encoder[];

 #endif /* HASHENCODER_H_ */
--- a/src/libecoprimer/hashsequence.c
+++ b/src/libecoprimer/hashsequence.c
@ -12,6 +12,16 @@ static int cmpword(const void *x,const void *y);

 #include "hashencoder.h"

+int8_t encoder[] = {0,                                            // A
+		                   -1,                                           // b
+		                   1,                                            // C
+	       	               -1,-1,-1,                                     // d, e, f
+		                   2,                                            // G
+		                   -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,          // h,i,j,k,l,m,n,o,p,q,r,s
+		                   3,3,                                           // T,U
+		                   -1,-1,-1,-1,-1};                              // v,w,x,y,z
+
+
 uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
 {
 	uint32_t wordcount;
@ -257,6 +267,6 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word)

 char ecoComplementChar(char base)
 {
-	return (base < 4)? !base & 3: 4;
+	return (base < 4)? ~base & 3: 4;
 }

--- a/src/libecoprimer/strictprimers.c
+++ b/src/libecoprimer/strictprimers.c
@ -205,8 +205,8 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
    		}
    		else
    		{
-    			uint32_t s;
-    			s = strictprimers->size;
+//  			uint32_t s;
+//    			s = strictprimers->size;
 //    			DEBUG_LOG("stack size : %u",s);
    			addSeqToWordCountTable(strictprimers,options->primer_length,
    					                             options->circular,
@ -223,7 +223,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
    			logfile = fopen(logfilename,"a");
    			seconde = timeval_subtract(&(usage.ru_utime),&(start.ru_utime)) +
    			          timeval_subtract(&(usage.ru_stime),&(start.ru_stime));
-    			fprintf(logfile,"%d\t%llu\t%lu\t%8.3f\t%8.3e\n",i,
+    			fprintf(logfile,"%d\t%llu\t%llu\t%8.3f\t%8.3e\n",i,
    					(long long unsigned)totallength,
 					strictprimers->size*(sizeof(int64_t)+sizeof(int32_t)),
    					seconde,seconde/(double)totallength);
@ -248,9 +248,9 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
 										sizeof(word_t)*strictprimers->size,
 										"Cannot reallocate strict primer table");

-    if (neededWords)
+    if (neededWords){
    	ECOFREE(neededWords,"Clean needed word table");
-
+	}
 	//TR: Somehow for some primers strictcount value is extremely large hence invalid
 	//we need to remove these primers from the list
 	j = strictprimers->size+1;
--- a/src/libecoprimer/taxstats.c
+++ b/src/libecoprimer/taxstats.c
@ -231,8 +231,8 @@ static int cmpamp(const void *ampf1, const void* ampf2)
 	char cd1;
 	char cd2;
 	int len = 0;
-	char *ch1;
-	char *ch2;
+	const char *ch1;
+	const char *ch2;
 	int incr1;
 	int incr2;

--- a/src/libthermo/nnparams.c
+++ b/src/libthermo/nnparams.c
@ -17,6 +17,15 @@

 double forbidden_entropy;

+char bpencoder[] = {	1,							// A
+		                   	0,                          // b
+		                   	2,                          // C
+		                   	0,0,0,                      // d, e, f
+		                   	3,                          // G
+		                   	0,0,0,0,0,0,0,0,0,0,0,0,    // h,i,j,k,l,m,n,o,p,q,r,s
+		                   	4,0,                        // T,U
+		                   	0,0,0,0,0};                 // v,w,x,y,z
+

 double nparam_GetInitialEntropy(PNNParams nparm)
 {
@ -470,7 +479,7 @@ int nparam_CountGCContent(char * seq ) {
 	return count;
 }

-void nparam_CleanSeq (char* inseq, char* outseq, int len)
+void nparam_CleanSeq (const char* inseq, char* outseq, int len)
 {
 	int seqlen = strlen (inseq);
 	int i, j;
@ -508,7 +517,7 @@ void nparam_CleanSeq (char* inseq, char* outseq, int len)
 }

 //Calculate TM for given sequence against its complement
-double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
+double nparam_CalcSelfTM(PNNParams nparm, const char* seq, size_t len)
 {
 	double thedH = 0;
 	//double thedS = nparam_GetInitialEntropy(nparm);
@ -520,7 +529,7 @@ double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
 	char c4;
 	unsigned int i;
 	char nseq[50];
-	char *useq = seq;
+	const char *useq = seq;

 	nparam_CleanSeq (seq, nseq, len);
 	useq = nseq;
@ -533,7 +542,7 @@ double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
 		c4 = GETNUMCODE(useq[i]);


-		thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
+		thedH += nparm->dH[(u_int8_t) c3][(u_int8_t)c4][(u_int8_t)c1][(u_int8_t)c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
 		thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
 	}
 	//printf("------------------\n");
@ -543,7 +552,7 @@ double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
 	return mtemp;
 }

-double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
+double nparam_CalcTwoTM(PNNParams nparm, const char* seq1, const char* seq2, size_t len)
 {
 	double thedH = 0;
 	//double thedS = nparam_GetInitialEntropy(nparm);
@ -575,7 +584,7 @@ double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)

 		//fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));

-		thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
+		thedH += nparm->dH[(u_int8_t)c3][(u_int8_t)c4][(u_int8_t)c1][(u_int8_t)c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
 		thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
 	}
 	//fprintf(stderr,"------------------\n");
--- a/src/libthermo/nnparams.h
+++ b/src/libthermo/nnparams.h
@ -16,8 +16,8 @@
 //#include "../libecoprimer/ecoprimer.h"

 // following defines to simplify coding...
-#define ndH(a,b,c,d) nparm->dH[a][b][c][d]
-#define ndS(a,b,c,d) nparm->dS[a][b][c][d]
+#define ndH(a,b,c,d) nparm->dH[(u_int8_t) a][(u_int8_t) b][(u_int8_t) c][(u_int8_t) d]
+#define ndS(a,b,c,d) nparm->dS[(u_int8_t) a][(u_int8_t) b][(u_int8_t) c][(u_int8_t) d]
 #define forbidden_enthalpy 1000000000000000000.0f
 #define	R 1.987f
 #define	SALT_METHOD_SANTALUCIA 1
@ -33,14 +33,7 @@

 extern double forbidden_entropy;

-static char bpencoder[] = {	1,							// A
-		                   	0,                          // b
-		                   	2,                          // C
-		                   	0,0,0,                      // d, e, f
-		                   	3,                          // G
-		                   	0,0,0,0,0,0,0,0,0,0,0,0,    // h,i,j,k,l,m,n,o,p,q,r,s
-		                   	4,0,                        // T,U
-		                   	0,0,0,0,0};                 // v,w,x,y,z
+extern char bpencoder[];                 // v,w,x,y,z


 typedef struct CNNParams_st
@ -62,8 +55,8 @@ int nparam_CountGCContent(char * seq );
 double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
 double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
 double nparam_CalcTM(double entropy,double enthalpy);
-double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
-double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
+double nparam_CalcSelfTM(PNNParams nparm, const char* seq, size_t len);
+double nparam_CalcTwoTM(PNNParams nparm, const char* seq1, const char* seq2, size_t len);

 double nparam_GetInitialEntropy(PNNParams nparm) ;
 double calculateMeltingTemperatureBasic (char * seq);
--- a/src/libthermo/thermostats.c
+++ b/src/libthermo/thermostats.c
@ -28,17 +28,15 @@ word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand)

 void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options)
 {
-	size_t i, j,k,l;
+	size_t i, j;
 	uint32_t   bp1,bp2;
-	uint32_t   ep1,ep2;
 	word_t w1;
 	word_t w2;
 	bool_t strand;

-	char *sq,*sq1,*sq2,*c;
+	char *sq;
 	char prmrd[50];
 	char prmrr[50];
-	char sqsite[50];
 	double mtemp;

 	for (i = 0; i < count; i++)
--- a/tools/ecoPCRFormat.py
+++ b/tools/ecoPCRFormat.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3

 import re
 import gzip
@ -6,11 +6,8 @@ import struct
 import sys
 import time
 import getopt
+from functools import cmp_to_key

-try:
-    import psycopg2
-    _dbenable=True
-except ImportError:
 _dbenable=False

 #####
@ -80,15 +77,17 @@ class ColumnFile(object):
    def __init__(self,stream,sep=None,strip=True,types=None):
        if isinstance(stream,str):
            self._stream = open(stream)
-        elif hasattr(stream,'next'):
-            self._stream = stream
        else:
-            raise ValueError,'stream must be string or an iterator'
+            try: 
+                iter(stream)
+                self._stream = stream
+            except TypeError:    
+                raise ValueError('stream must be string or an iterator')
        self._delimiter=sep
        self._strip=strip
        if types:
            self._types=[x for x in types]
-            for i in xrange(len(self._types)):
+            for i in range(len(self._types)):
                if self._types[i] is bool:
                    self._types[i]=ColumnFile.str2bool
        else:
@ -103,14 +102,14 @@ class ColumnFile(object):
    def __iter__(self):
        return self
    
-    def next(self):
-        ligne = self._stream.next()
+    def __next__(self):
+        ligne = next(self._stream)
        data = ligne.split(self._delimiter)
        if self._strip or self._types:
            data = [x.strip() for x in data]
        if self._types:
            it = endLessIterator(self._types)
-            data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+            data = [x[1](x[0]) for x in ((y,next(it)) for y in data)]
        return data
    
 def taxonCmp(t1,t2):
@ -125,14 +124,14 @@ def bsearchTaxon(taxonomy,taxid):
    begin = 0
    end   = taxCount 
    oldcheck=taxCount
-    check = begin + end / 2
+    check = int(begin + end / 2)
    while check != oldcheck and taxonomy[check][0]!=taxid :
        if taxonomy[check][0] < taxid:
            begin=check
        else:
            end=check
        oldcheck=check
-        check = (begin + end) / 2
+        check = int((begin + end) / 2)
        
        
    if taxonomy[check][0]==taxid:
@ -152,22 +151,22 @@ def readNodeTable(file):
                              str,str,bool,
                              int,bool,int,
                              bool,bool,bool,str))
-    print >>sys.stderr,"Reading taxonomy dump file..."
+    print("Reading taxonomy dump file...", file=sys.stderr)
    taxonomy=[[n[0],n[2],n[1]] for n in nodes]
-    print >>sys.stderr,"List all taxonomy rank..."    
+    print("List all taxonomy rank...", file=sys.stderr)    
    ranks =list(set(x[1] for x in taxonomy))
    ranks.sort()
-    ranks = dict(map(None,ranks,xrange(len(ranks))))
+    ranks = {rank: index for index, rank in enumerate(ranks)}
    
-    print >>sys.stderr,"Sorting taxons..."
-    taxonomy.sort(taxonCmp)
+    print("Sorting taxons...", file=sys.stderr)
+    taxonomy.sort(key=lambda x: x[0])

-    print >>sys.stderr,"Indexing taxonomy..."
+    print("Indexing taxonomy...", file=sys.stderr)
    index = {}
    for t in taxonomy:
        index[t[0]]=bsearchTaxon(taxonomy, t[0])
    
-    print >>sys.stderr,"Indexing parent and rank..."
+    print("Indexing parent and rank...", file=sys.stderr)
    for t in taxonomy:
        t[1]=ranks[t[1]]
        t[2]=index[t[2]]
@ -203,7 +202,7 @@ def deletedNodeIterator(file):
 def readTaxonomyDump(taxdir):
    taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
    
-    print >>sys.stderr,"Adding scientific name..."
+    print("Adding scientific name...", file=sys.stderr)

    alternativeName=[]
    for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
@ -211,66 +210,16 @@ def readTaxonomyDump(taxdir):
        if classname == 'scientific name':
            taxonomy[index[taxid]].append(name)
        
-    print >>sys.stderr,"Adding taxid alias..."
+    print("Adding taxid alias...", file=sys.stderr)
    for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
        index[taxid]=index[current]
    
-    print >>sys.stderr,"Adding deleted taxid..."
+    print("Adding deleted taxid...", file=sys.stderr)
    for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
        index[taxid]=None
    
    return taxonomy,ranks,alternativeName,index

-def readTaxonomyDB(dbname):
-    connection = psycopg2.connect(database=dbname)
-    
-    cursor = connection.cursor()
-    cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
-    taxonomy=[list(x) for x in cursor]
-
-    cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
-    ranks=cursor.fetchall()
-    ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
-    
-    print >>sys.stderr,"Sorting taxons..."
-    taxonomy.sort(taxonCmp)
-
-    print >>sys.stderr,"Indexing taxonomy..."
-    index = {}
-    for t in taxonomy:
-        index[t[0]]=bsearchTaxon(taxonomy, t[0])
-    
-    print >>sys.stderr,"Indexing parent and rank..."
-    for t in taxonomy:
-        t[1]=ranks[t[1]]
-        try:
-            t[2]=index[t[2]]
-        except KeyError,e:
-            if t[2] is None and t[0]==1:
-                t[2]=index[t[0]]
-            else:
-                raise e
-        
-    cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
-    
-    alternativeName=[]
-    for taxid,name,classname in cursor:
-        alternativeName.append((name,classname,index[taxid]))
-        if classname == 'scientific name':
-            taxonomy[index[taxid]].append(name)
-    
-    cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
- 
-    print >>sys.stderr,"Adding taxid alias..."
-    for taxid,current in cursor:
-        if current is not None:
-            index[taxid]=index[current]
-        else:
-            index[taxid]=None
-            
-       
-    return taxonomy,ranks,alternativeName,index
-    
 #####
 #
 #
@ -282,22 +231,27 @@ def readTaxonomyDB(dbname):
 def entryIterator(file):
    file = universalOpen(file)
    rep =[]
-    for ligne in file:
+    ligne = file.readline()
+    while ligne:
        rep.append(ligne)
        if ligne == '//\n':
            rep = ''.join(rep)
            yield rep
            rep = []
+        ligne = file.readline()
            
 def fastaEntryIterator(file):
    file = universalOpen(file)
    rep =[]
-    for ligne in file:
+    ligne =  file.readline()
+    while ligne:
        if ligne[0] == '>' and rep:
            rep = ''.join(rep)
            yield rep
            rep = []
        rep.append(ligne)
+        ligne = file.readline()
+
    if rep:
        rep = ''.join(rep)
        yield rep
@ -418,7 +372,7 @@ def taxonomyInfo(entry,connection):
    
 def ecoSeqPacker(sq):
    
-    compactseq = gzip.zlib.compress(sq['sequence'],9)
+    compactseq = gzip.zlib.compress(bytes(sq['sequence'],"ascii"),9)
    cptseqlength  = len(compactseq)
    delength   = len(sq['definition'])
    
@ -427,11 +381,11 @@ def ecoSeqPacker(sq):
    packed = struct.pack('> I I 20s I I I %ds %ds' % (delength,cptseqlength),
                         totalSize,
                         sq['taxid'],
-                         sq['id'],
+                         bytes(sq['id'],"ascii"),
                         delength,
                         len(sq['sequence']),
                         cptseqlength,
-                         sq['definition'],
+                         bytes(sq['definition'],"ascii"),
                         compactseq)
    
    assert len(packed) == totalSize+4, "error in sequence packing"
@ -450,7 +404,7 @@ def ecoTaxPacker(tx):
                         tx[1],
                         tx[2], 
                         namelength,
-                         tx[3])
+                         bytes(tx[3],"ascii"))
    
    return packed

@ -460,7 +414,7 @@ def ecoRankPacker(rank):
    
    packed = struct.pack('> I %ds' % namelength,
                         namelength,
-                         rank)
+                         bytes(rank, 'ascii'))
    
    return packed
                
@ -476,8 +430,8 @@ def ecoNamePacker(name):
                         namelength,
                         classlength,
                         name[2],
-                         name[0],
-                         name[1])
+                         bytes(name[0], 'ascii'),
+                         bytes(name[1], 'ascii'))
    
    return packed
    
@ -505,11 +459,11 @@ def ecoSeqWriter(file,input,taxindex,parser):
                skipped.append(entry['id'])
            where = universalTell(input)
            progressBar(where, inputsize)
-            print >>sys.stderr," Readed sequences : %d     " % seqcount,
+            print(" Readed sequences : %d     " % seqcount, end=' ', file=sys.stderr)
        else:
            skipped.append(entry['id'])
        
-    print >>sys.stderr
+    print(file=sys.stderr)
    output.seek(0,0)
    output.write(struct.pack('> I',seqcount))
    
@ -530,7 +484,7 @@ def ecoRankWriter(file,ranks):
    output = open(file,'wb')
    output.write(struct.pack('> I',len(ranks)))

-    rankNames = ranks.keys()
+    rankNames = list(ranks.keys())
    rankNames.sort()
    
    for rank in rankNames:
@ -552,7 +506,7 @@ def ecoNameWriter(file,names):
    output = open(file,'wb')
    output.write(struct.pack('> I',len(names)))

-    names.sort(nameCmp)
+    names.sort(key=lambda x:x[0].upper())
    
    for name in names:
        output.write(ecoNamePacker(name))
@ -573,8 +527,8 @@ def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
                     taxonomy[3], 
                     parser)
        if sk:
-            print >>sys.stderr,"Skipped entry :"
-            print >>sys.stderr,sk
+            print("Skipped entry :", file=sys.stderr)
+            print(sk, file=sys.stderr)
        
 def ecoParseOptions(arguments):
    opt = {
@ -618,34 +572,30 @@ def ecoParseOptions(arguments):
            opt['parser']=sequenceIteratorFactory(emblEntryParser,
                                                  entryIterator)
        else:
-            raise ValueError,'Unknown option %s' % name
+            raise ValueError('Unknown option %s' % name)

    return opt,filenames

 def printHelp():
-    print "-----------------------------------"
-    print " ecoPCRFormat.py"
-    print "-----------------------------------"
-    print "ecoPCRFormat.py [option] <argument>"
-    print "-----------------------------------"
-    print "-e    --embl        :[E]mbl format"
-    print "-f    --fasta       :[F]asta format"
-    print "-g    --genbank     :[G]enbank format"
-    print "-h    --help        :[H]elp - print this help"
-    print "-n    --name        :[N]ame of the new database created"
-    print "-t    --taxonomy    :[T]axonomy - path to the taxonomy database"
-    print "                    :bcp-like dump from GenBank taxonomy database."
-    print "-----------------------------------"
+    print("-----------------------------------")
+    print(" ecoPCRFormat.py")
+    print("-----------------------------------")
+    print("ecoPCRFormat.py [option] <argument>")
+    print("-----------------------------------")
+    print("-e    --embl        :[E]mbl format")
+    print("-f    --fasta       :[F]asta format")
+    print("-g    --genbank     :[G]enbank format")
+    print("-h    --help        :[H]elp - print this help")
+    print("-n    --name        :[N]ame of the new database created")
+    print("-t    --taxonomy    :[T]axonomy - path to the taxonomy database")
+    print("                    :bcp-like dump from GenBank taxonomy database.")
+    print("-----------------------------------")

 if __name__ == '__main__':
    
    opt,filenames = ecoParseOptions(sys.argv[1:])
    
-    if opt['taxmod']=='dump':
    taxonomy = readTaxonomyDump(opt['taxdir'])
-    elif opt['taxmod']=='db':
-        taxonomy = readTaxonomyDB(opt['taxdb'])
-        
    
    ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])