Merge of eric-test branche to the trunk

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@200 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-04-20 08:38:41 +00:00
parent b8af5dd65f
commit e3d922e103
17 changed files with 1308 additions and 264 deletions
--- a/src/ecoPrimer
+++ b/src/ecoPrimer
--- a/src/ecoprimer.c
+++ b/src/ecoprimer.c
@ -25,9 +25,59 @@

 static void PrintHelp()
 {
-        PP      "------------------------------------------\n");
-        PP      " ecoPrimer Version %s\n", VERSION);
-        PP      "------------------------------------------\n");
+			  PP      "------------------------------------------\n");
+			  PP      " ecoPrimer Version %s\n", VERSION);
+			  PP      "------------------------------------------\n");
+              PP      "synopsis : finding primers and measureing the quality of primers and barcode region\n");
+              PP      "usage: ./ecoPrimer [options] \n");
+              PP      "------------------------------------------\n");
+              PP      "options:\n");
+              PP      "-d    : [D]atabase : to match the expected format, the database\n");
+              PP      "        has to be formated first by the ecoPCRFormat.py program located.\n");
+              PP      "        in the ecoPCR/tools directory.\n");
+              PP      "        ecoPCRFormat.py creates three file types :\n");
+              PP      "            .sdx : contains the sequences\n");
+              PP      "            .tdx : contains information concerning the taxonomy\n");
+              PP      "            .rdx : contains the taxonomy rank\n\n");
+              PP      "        ecoPrimer needs all the file type. As a result, you have to write the\n");
+              PP      "        database radical without any extension. For example /ecoPrimerDB/fstvert\n\n");        
+              PP      "-e    : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
+              PP      "-h    : [H]elp - print <this> help\n\n");
+              PP      "-i    : [I]gnore the given taxonomy id.\n\n");
+              PP      "-l    : minimum [L]ength : define the minimum amplication length. \n\n");
+              PP      "-L    : maximum [L]ength : define the maximum amplicationlength. \n\n");
+              PP      "-r    : [R]estricts the search to the given taxonomic id.\n\n");
+              PP      "-c    : Consider that the database sequences are [c]ircular\n\n");
+              PP      "-3 	 : Three prime strict match\n\n");
+              PP      "-q    : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n");
+              PP      "-s    : [S]ensitivity quorum\n\n");
+              PP      "-t    : required [t]axon level for results, by default the results are computed at species level\n\n");
+              PP      "-x    : false positive quorum\n\n");
+              PP      "-D    : set in [d]ouble strand mode\n\n");
+              PP      "-S    : Set in [s]ingle strand mode\n\n");
+              PP      "-U    : No multi match\n\n");
+              PP      "\n");
+              PP      "------------------------------------------\n");
+              PP      "Table result description : \n");
+              PP      "column 1 : serial number\n");
+              PP      "column 2 : primer1\n");
+              PP      "column 3 : primer2\n");
+              PP      "column 4 : good/bad\n");
+              PP      "column 5 : in sequence count\n");
+              PP      "column 6 : out sequence count\n");
+              PP      "column 7 : yule\n");
+              PP      "column 8 : in taxa count\n");
+              PP      "column 9 : out taxa count\n");
+              PP      "column 10 : coverage\n");
+              PP      "column 11 : specificity\n");
+              PP      "column 12 : minimum amplified length\n");
+              PP      "column 13 : maximum amplified length\n");
+              PP      "column 14 : average amplified length\n");
+              PP      "------------------------------------------\n");
+              PP		" http://www.grenoble.prabi.fr/trac/ecoPrimer/\n");
+              PP      "------------------------------------------\n\n");        
+              PP      "\n");
+
 }

 static void ExitUsage(int stat)
@ -56,7 +106,7 @@ void initoptions(poptions_t options)
 	options->strict_exclude_quorum=0.1;
 	options->sensitivity_quorum=0.9;
 	options->false_positive_quorum=0.1;
-	options->strict_three_prime=2;
+	options->strict_three_prime=0;
 	options->r=0;
 	options->g=0;
 	options->no_multi_match=FALSE;
@ -75,7 +125,7 @@ void printcurrenttime ()
    /* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
    ts = localtime(&now);
    strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
-    fprintf(stderr,"#%d#, %s\n",now, buf);
+    fprintf(stderr,"#%d#, %s\n",(int)now, buf);
 }

 void printcurrenttimeinmilli()
@ -90,7 +140,125 @@ void printcurrenttimeinmilli()
 }

 /*TR: Added*/
+
+void printapair(int32_t index,ppair_t pair, poptions_t options)
+{
+	uint32_t wellidentifiedtaxa;
+	
+	printf("%6d\t",index);
+	if (pair->asdirect1)
+		printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
+	else
+		printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word,
+				                                      options->primer_length),options->primer_length));
+
+	if (pair->asdirect2)
+		printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length));
+	else
+		printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word,
+				                                    options->primer_length),options->primer_length));
+
+	printf("\t%c%c", "bG"[(int)pair->p1->good],"bG"[(int)pair->p2->good]);
+
+
+	printf("\t%d", pair->inexample);
+	printf("\t%d", pair->outexample);
+	printf("\t%4.3f", pair->yule);
+
+	printf("\t%d", pair->intaxa);
+	printf("\t%d", pair->outtaxa);
+	printf("\t%4.3f", (float)pair->intaxa/options->intaxa);
+	
+	wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa;
+	
+	//printf("\t%d", pair->notwellidentifiedtaxa);
+	
+	//printf("\t%d", (pair->intaxa + pair->outtaxa));
+
+
+	
+	printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa));
+
+
+	printf("\t%d", pair->mind);
+	printf("\t%d", pair->maxd);
+	printf("\t%3.2f\n", (float)pair->sumd/pair->inexample);
+
+}
+
+uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
+{
+	uint32_t i,j;
+	float q,qfp;
+
+	for (i=0,j=0;i < count;i++)
+	{
+		if (options->insamples)
+			q = (float)sortedpairs[i]->inexample/options->insamples;
+		else q=1.0;
+
+		if (options->outsamples)
+			qfp = (float)sortedpairs[i]->outexample/options->outsamples;
+		else qfp=0.0;
+
+		sortedpairs[i]->quorumin = q;
+		sortedpairs[i]->quorumout = qfp;
+		sortedpairs[i]->yule = q -qfp;
+
+
+		sortedpairs[j]=sortedpairs[i];
+
+
+
+		if (q > options->sensitivity_quorum &&
+			qfp < options->false_positive_quorum)
+		{
+			(void)taxonomycoverage(sortedpairs[j],options);
+			taxonomyspecificity(sortedpairs[j]);
+			j++;
+		}
+	}
+
+	return j;
+}
+
+void printpairs (ppairtree_t pairs, poptions_t options)
+{
+   ppair_t* sortedpairs;
+   ppair_t* index;
+   ppairlist_t pl;
+   size_t i,j;
+   int32_t count;
+
+   //printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n"); 
+   
+   fprintf(stderr,"Total pair count : %d\n",pairs->count);
+
+   sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs");
+   index=sortedpairs;
+   pl=pairs->first;
+   j=0;
+   while(pl->next)
+   {
+	   for (i=0;i<pl->paircount;i++,j++)
+		   sortedpairs[j]=pl->pairs+i;
+	   pl=pl->next;
+   }
+
+   for (i=0;i<pl->paircount;i++,j++)
+	   sortedpairs[j]=pl->pairs+i;
+
+   count=filterandsortpairs(sortedpairs,pairs->count,options);
+
+   for (i=0;i < count;i++)
+	   printapair(i,sortedpairs[i],options);
+
+}
+
+
+#ifdef MASKEDCODE
 void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
+
 {
 	uint32_t i;
 	uint32_t  wordsize = options->primer_length;
@ -98,9 +266,9 @@ void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, ui
 	double sens;
 	double speci;
 	float  avg;
-	
+
 	quorumseqs = seqdbsize * 70 / 100;
-	
+
 	printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);

 	for (i=0; i < pairs.paircount; i++)
@ -121,9 +289,12 @@ void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, ui
 	}
 }

+#endif /* MASKEDCODE */
+
 /*updateseqparams: This function counts the insample and outsample sequences
 *  and with each sequences adds a tag of the taxon to which the sequence beongs*/
-void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, 
+
+void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
 		poptions_t options, int32_t *insamples, int32_t *outsamples)
 {
 	uint32_t i;
@ -131,7 +302,7 @@ void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxo
 	ecotx_t  *tmptaxon;

    for (i=0;i<seqdbsize;i++)
-    {    	
+    {
    	seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
    	if (seqdb[i]->isexample)
    		(*insamples)++;
@ -139,7 +310,7 @@ void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxo
    		(*outsamples)++;

    	taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
-		tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);		
+		tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
 		if (tmptaxon)
 			tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
 		if (tmptaxon)
@ -154,7 +325,7 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
    /*set taxon rank for which result is to be given*/
    for (i = 0; i < taxonomy->ranks->count; i++)
    {
-    	if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0) 
+    	if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0)
    	{
    		options->taxonrankidx = i;
    		break;
@ -168,47 +339,10 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
    }
 }
 /* to get db stats, totals of species, genus etc....*/
-int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, 
-		poptions_t options)
-{
-	uint32_t i;
-	uint32_t j;
-	uint32_t nameslots = 500;
-	uint32_t namesindex = 0;
-	int32_t *ranktaxonids = ECOMALLOC(nameslots * sizeof(int32_t), "Error in taxon rank allocation");
-	int32_t taxid;
-	
-	ecotx_t  *tmptaxon;

-    for (i=0;i<seqdbsize;i++)
-	{
-    	taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
-		tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
-		if (tmptaxon)
-			tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
-		if (tmptaxon)
-		{
-			for (j = 0; j < namesindex; j++)
-			{
-				if (tmptaxon->taxid == ranktaxonids[j]) break;
-			}
-			if (j < namesindex) continue; /* name is already in list, so no need to add it*/
-			
-			if (namesindex == nameslots)
-			{
-				nameslots += 500;
-				ranktaxonids = ECOREALLOC(ranktaxonids, nameslots * sizeof(int32_t), "Cannot allocate pair rank taxon table");
-			}
-			ranktaxonids[namesindex] = tmptaxon->taxid;
-			namesindex++;
-		}
-	}
-    ECOFREE(ranktaxonids, "free rank taxon table");
-    
-    return namesindex;
-}
+#ifdef MASKEDCODE

-void setoktaxforspecificity (ppairscount_t pairs)
+void setoktaxforspecificity (ppairtree_t pairs)
 {
 	uint32_t i;
 	uint32_t j;
@ -216,7 +350,7 @@ void setoktaxforspecificity (ppairscount_t pairs)
 	uint32_t l;
 	int taxcount;
 	int32_t taxid;
-	
+
 	for (i = 0; i < pairs->paircount; i++)
 	{
 		for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
@ -235,13 +369,13 @@ void setoktaxforspecificity (ppairscount_t pairs)
 							taxcount += pairs->pairs[i].ampset[l].seqidindex;
 							break;
 						}
-						
+
 						if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
 						{
 							if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
-							taxcount++;	
+							taxcount++;
 						}
-						
+
 						if (taxcount > 1) break;
 					}
 				}
@ -251,6 +385,8 @@ void setoktaxforspecificity (ppairscount_t pairs)
 	}
 }

+#endif
+
 int main(int argc, char **argv)
 {
 	pecodnadb_t   seqdb; /* of type ecoseq_t */
@ -264,11 +400,11 @@ int main(int argc, char **argv)
 	int32_t       insamples=0;
 	int32_t       outsamples=0;
 	uint32_t        i;
-	
-	pwordcount_t  words;
-	pprimercount_t     primers;
-	pairscount_t		pairs;
-	
+
+	pwordcount_t    words;
+	pprimercount_t  primers;
+	ppairtree_t		pairs;
+
 	int32_t		  rankdbstats = 0;

 	//printcurrenttime();
@ -290,9 +426,9 @@ int main(int argc, char **argv)
                                /* -------------------- */
        case 'h':               /* help                 */
                                /* -------------------- */
-           PrintHelp();
-           exit(0);
-           break;
+        	PrintHelp();
+        	 exit(0);
+        	 break;

                                /* ------------------------- */
        case 'l':               /* min amplification lenght  */
@ -337,7 +473,7 @@ int main(int argc, char **argv)
        	strncpy(options.taxonrank, optarg, 19);
        	options.taxonrank[19] = 0;
            break;
-        	
+
        						/* -------------------- */
        case 'x':               /* strict matching quorum           */
 								/* -------------------- */
@ -396,22 +532,27 @@ int main(int argc, char **argv)
    fprintf(stderr,"Reading taxonomy database ...");
    taxonomy = read_taxonomy(options.prefix,0);
    fprintf(stderr,"Ok\n");
-    
+
    setresulttaxonrank(taxonomy, &options); /*TR: set rank level for statistics*/
-    
+
    fprintf(stderr,"Reading sequence database ...\n");

    seqdb = readdnadb(options.prefix,&seqdbsize);

    fprintf(stderr,"Ok\n");
    fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);
-    
+
    updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples);
+    options.dbsize=seqdbsize;
+    options.insamples=insamples;
+    options.outsamples=outsamples;

    rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options);

-    fprintf(stderr,"Database is constituted of %5d examples\n",insamples);
-    fprintf(stderr,"                       and %5d counterexamples\n",outsamples);
+    fprintf(stderr,"Database is constituted of %5d examples        corresponding to %5d %s\n",insamples,
+    		options.intaxa,options.taxonrank);
+    fprintf(stderr,"                       and %5d counterexamples corresponding to %5d %s\n",outsamples,
+    		options.outtaxa,options.taxonrank);
    fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats);

    fprintf(stderr,"\nIndexing words in sequences\n");
@ -419,7 +560,7 @@ int main(int argc, char **argv)
    printcurrenttimeinmilli();
    words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
    printcurrenttimeinmilli();
-    
+
    fprintf(stderr,"\n  Strict primer count : %d\n",words->size);

    if (options.no_multi_match)
@ -460,13 +601,15 @@ int main(int argc, char **argv)

    /*TR: Added*/
    pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
-    setoktaxforspecificity (&pairs);
    
-    printpairs (pairs, &options, rankdbstats, seqdbsize);
-    
-    
-    
-    ECOFREE(pairs.pairs,"Free pairs table");
+
+  //  setoktaxforspecificity (&pairs);
+
+     printpairs (pairs, &options);
+
+
+
+    //ECOFREE(pairs.pairs,"Free pairs table");

    return 0;
 }
--- a/src/libecoPCR/ecoError.P
+++ b/src/libecoPCR/ecoError.P
@ -0,0 +1,15 @@
+ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \
+  /usr/include/_types.h /usr/include/sys/_types.h \
+  /usr/include/sys/cdefs.h /usr/include/machine/_types.h \
+  /usr/include/i386/_types.h /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
+  /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
+  /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
+  /usr/include/machine/signal.h /usr/include/i386/signal.h \
+  /usr/include/i386/_structs.h /usr/include/sys/_structs.h \
+  /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
+  /usr/include/sys/resource.h /usr/include/machine/endian.h \
+  /usr/include/i386/endian.h /usr/include/sys/_endian.h \
+  /usr/include/libkern/_OSByteOrder.h \
+  /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
+  /usr/include/machine/types.h /usr/include/i386/types.h
--- a/src/libecoPCR/ecoIOUtils.P
+++ b/src/libecoPCR/ecoIOUtils.P
@ -0,0 +1,15 @@
+ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \
+  /usr/include/_types.h /usr/include/sys/_types.h \
+  /usr/include/sys/cdefs.h /usr/include/machine/_types.h \
+  /usr/include/i386/_types.h /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
+  /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
+  /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
+  /usr/include/machine/signal.h /usr/include/i386/signal.h \
+  /usr/include/i386/_structs.h /usr/include/sys/_structs.h \
+  /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
+  /usr/include/sys/resource.h /usr/include/machine/endian.h \
+  /usr/include/i386/endian.h /usr/include/sys/_endian.h \
+  /usr/include/libkern/_OSByteOrder.h \
+  /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
+  /usr/include/machine/types.h /usr/include/i386/types.h
--- a/src/libecoPCR/ecoMalloc.P
+++ b/src/libecoPCR/ecoMalloc.P
@ -0,0 +1,15 @@
+ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \
+  /usr/include/_types.h /usr/include/sys/_types.h \
+  /usr/include/sys/cdefs.h /usr/include/machine/_types.h \
+  /usr/include/i386/_types.h /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
+  /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
+  /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
+  /usr/include/machine/signal.h /usr/include/i386/signal.h \
+  /usr/include/i386/_structs.h /usr/include/sys/_structs.h \
+  /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
+  /usr/include/sys/resource.h /usr/include/machine/endian.h \
+  /usr/include/i386/endian.h /usr/include/sys/_endian.h \
+  /usr/include/libkern/_OSByteOrder.h \
+  /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
+  /usr/include/machine/types.h /usr/include/i386/types.h
--- a/src/libecoPCR/ecodna.P
+++ b/src/libecoPCR/ecodna.P
@ -0,0 +1,5 @@
+ecodna.o ecodna.P : ecodna.c /usr/include/string.h /usr/include/_types.h \
+  /usr/include/sys/_types.h /usr/include/sys/cdefs.h \
+  /usr/include/machine/_types.h /usr/include/i386/_types.h ecoPCR.h \
+  /usr/include/stdio.h /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h
--- a/src/libecoPCR/ecofilter.P
+++ b/src/libecoPCR/ecofilter.P
@ -0,0 +1,5 @@
+ecofilter.o ecofilter.P : ecofilter.c ecoPCR.h /usr/include/stdio.h \
+  /usr/include/_types.h /usr/include/sys/_types.h \
+  /usr/include/sys/cdefs.h /usr/include/machine/_types.h \
+  /usr/include/i386/_types.h /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h
--- a/src/libecoPCR/econame.P
+++ b/src/libecoPCR/econame.P
@ -0,0 +1,15 @@
+econame.o econame.P : econame.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
+  /usr/include/sys/_types.h /usr/include/sys/cdefs.h \
+  /usr/include/machine/_types.h /usr/include/i386/_types.h \
+  /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
+  /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
+  /usr/include/sys/wait.h /usr/include/sys/signal.h \
+  /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
+  /usr/include/i386/signal.h /usr/include/i386/_structs.h \
+  /usr/include/sys/_structs.h /usr/include/machine/_structs.h \
+  /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
+  /usr/include/machine/endian.h /usr/include/i386/endian.h \
+  /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
+  /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
+  /usr/include/machine/types.h /usr/include/i386/types.h
--- a/src/libecoPCR/ecorank.P
+++ b/src/libecoPCR/ecorank.P
@ -0,0 +1,15 @@
+ecorank.o ecorank.P : ecorank.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
+  /usr/include/sys/_types.h /usr/include/sys/cdefs.h \
+  /usr/include/machine/_types.h /usr/include/i386/_types.h \
+  /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
+  /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
+  /usr/include/sys/wait.h /usr/include/sys/signal.h \
+  /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
+  /usr/include/i386/signal.h /usr/include/i386/_structs.h \
+  /usr/include/sys/_structs.h /usr/include/machine/_structs.h \
+  /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
+  /usr/include/machine/endian.h /usr/include/i386/endian.h \
+  /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
+  /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
+  /usr/include/machine/types.h /usr/include/i386/types.h
--- a/src/libecoPCR/ecoseq.P
+++ b/src/libecoPCR/ecoseq.P
@ -0,0 +1,19 @@
+ecoseq.o ecoseq.P : ecoseq.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
+  /usr/include/sys/_types.h /usr/include/sys/cdefs.h \
+  /usr/include/machine/_types.h /usr/include/i386/_types.h \
+  /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
+  /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
+  /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
+  /usr/include/machine/signal.h /usr/include/i386/signal.h \
+  /usr/include/i386/_structs.h /usr/include/sys/_structs.h \
+  /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
+  /usr/include/sys/resource.h /usr/include/machine/endian.h \
+  /usr/include/i386/endian.h /usr/include/sys/_endian.h \
+  /usr/include/libkern/_OSByteOrder.h \
+  /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
+  /usr/include/machine/types.h /usr/include/i386/types.h \
+  /usr/include/string.h /usr/include/zlib.h /usr/include/zconf.h \
+  /usr/include/sys/types.h /usr/include/unistd.h \
+  /usr/include/sys/unistd.h /usr/include/sys/select.h \
+  /usr/include/sys/_select.h
--- a/src/libecoPCR/ecotax.P
+++ b/src/libecoPCR/ecotax.P
@ -0,0 +1,15 @@
+ecotax.o ecotax.P : ecotax.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
+  /usr/include/sys/_types.h /usr/include/sys/cdefs.h \
+  /usr/include/machine/_types.h /usr/include/i386/_types.h \
+  /usr/include/inttypes.h \
+  /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
+  /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
+  /usr/include/sys/wait.h /usr/include/sys/signal.h \
+  /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
+  /usr/include/i386/signal.h /usr/include/i386/_structs.h \
+  /usr/include/sys/_structs.h /usr/include/machine/_structs.h \
+  /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
+  /usr/include/machine/endian.h /usr/include/i386/endian.h \
+  /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
+  /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
+  /usr/include/machine/types.h /usr/include/i386/types.h
--- a/src/libecoprimer/Makefile
+++ b/src/libecoprimer/Makefile
@ -10,7 +10,9 @@ SOURCES = goodtaxon.c \
          queue.c \
          libstki.c \
          sortmatch.c \
+          pairtree.c \
          pairs.c \
+          taxstats.c \
          apat_search.c

 SRCS=$(SOURCES)
--- a/src/libecoprimer/aproxpattern.c
+++ b/src/libecoprimer/aproxpattern.c
@ -61,7 +61,7 @@ void encodeSequence(ecoseq_t *seq)

     for (i=0;i<seq->SQ_length;i++,data++,cseq++)
     {
-         *data = encoder[(IS_UPPER(*cseq) ? *cseq - 'A' : 'Z')];
+         *data = encoder[(IS_UPPER(*cseq) ? *cseq : 'Z') - 'A'];
     }
 }

@ -82,7 +82,7 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3
 	uint32_t  inSequenceQuorum;
 	uint32_t  outSequenceQuorum;
 	bool_t    conserved = TRUE;
-	
+
 	//poslist_t ttt;


--- a/src/libecoprimer/ecoprimer.h
+++ b/src/libecoprimer/ecoprimer.h
@ -79,28 +79,39 @@ typedef union {
 	uint32_t value;
 } poslist_t, *ppostlist_t;

-typedef struct {
-    word_t     word;
-	uint32_t   *directCount;
-	ppostlist_t directPos;
+/**
+ * primer_t structure store fuzzy match positions for a primer
+ * on all sequences
+ */

-	uint32_t   *reverseCount;
-	ppostlist_t reversePos;
-	bool_t     good;
-	uint32_t   inexample;
-	uint32_t   outexample;
+typedef struct {
+    word_t     word;              //< code for the primer
+	uint32_t   *directCount;      //< Occurrence count on direct strand
+	ppostlist_t directPos;        //< list of position list on direct strand
+
+	uint32_t   *reverseCount;     //< Occurrence count on reverse strand
+	ppostlist_t reversePos;       //< list of position list on reverse strand
+
+	bool_t     good;              //< primer match more than quorum example and no
+	                              //  more counterexample quorum.
+
+	uint32_t   inexample;         //< count of example sequences matching primer
+	uint32_t   outexample;        //< count of counterexample sequences matching primer
 } primer_t, *pprimer_t;

+/**
+ * primercount_t structure store fuzzy match positions for all primers
+ * on all sequences as a list of primer_t
+ */
 typedef struct {
-	pprimer_t   primers;
+	pprimer_t   primers; 
 	uint32_t    size;
 } primercount_t, *pprimercount_t;

 typedef struct {
-	word_t      word;
+	pprimer_t   primer;
 	uint32_t    position;
 	bool_t      strand;
-	bool_t     	good; /*TR: Added*/
 } primermatch_t, *pprimermatch_t;

 /*TR: Added*/
@ -109,6 +120,19 @@ typedef struct {
 	uint32_t	matchcount;
 } primermatchcount_t, *pprimermatchcount_t;

+typedef struct {
+	pecoseq_t  sequence;
+	bool_t     strand;
+	const char *amplifia;
+	int32_t	   length;
+} amplifia_t, *pamplifia_t;
+
+typedef struct {
+	pamplifia_t amplifias;
+	uint32_t    ampcount;
+	uint32_t	ampslot;
+} amplifiacount_t, *pamplifiacount_t;
+
 typedef struct {
 	char *amplifia;
 	int32_t *taxonids;
@ -124,30 +148,52 @@ typedef struct {
 } taxampset_t, *ptaxampset_t;

 typedef struct {
-	word_t 			w1;
-	word_t 			w2;
-	uint32_t   		inexample; /*inexample count*/
-	uint32_t   		outexample; /*outexample count*/
-	
-	uint32_t 		mind;
-	uint32_t		maxd;
-	
-	uint32_t		ampsetcount;
-	uint32_t		ampsetindex;
-	pampseqset_t	ampset;
-	
-	uint32_t		taxsetcount;
-	uint32_t		taxsetindex;
-	ptaxampset_t	taxset;
-	
-	uint32_t		oktaxoncount;
-} pairs_t, *ppairs_t;
+	pprimer_t 		p1;
+	bool_t			asdirect1;
+	pprimer_t 		p2;
+	bool_t			asdirect2;
+
+	amplifiacount_t pcr;
+
+	uint32_t   		inexample;         //< example sequence count
+	uint32_t   		outexample;        //< counterexample sequence count
+	uint32_t   		intaxa;            //< example taxa count
+	uint32_t   		outtaxa;            //< counterexample taxa count
+	uint32_t		notwellidentifiedtaxa;
+
+
+					// these statistics are relative to inexample sequences
+
+	uint32_t 		mind;				//< minimum distance between primers
+	uint32_t		maxd;				//< maximum distance between primers
+	uint32_t        sumd;				//< distance sum
+	float			yule;
+	float			quorumin;
+	float           quorumout;
+//
+//	uint32_t		taxsetcount;
+//	uint32_t		taxsetindex;
+//	ptaxampset_t	taxset;
+//
+//	uint32_t		oktaxoncount;
+
+} pair_t, *ppair_t;

 /*TR: Added*/
+
 typedef struct {
-	ppairs_t 	pairs;
-	uint32_t	paircount;
-}pairscount_t, *ppairscount_t;
+	size_t  	paircount;
+	size_t      pairslots;
+    void*       next;
+	pair_t 	    pairs[1];
+} pairlist_t, *ppairlist_t;
+
+typedef struct {
+	ppairlist_t first;
+	ppairlist_t last;
+	void       *tree;
+	int32_t     count;
+} pairtree_t, *ppairtree_t;

 typedef struct {
 	pword_t     words;
@ -168,6 +214,18 @@ typedef struct {
 	uint32_t    size;
 } merge_t, *pmerge_t;

+typedef struct {
+	const char 		*amplifia;
+	bool_t     	strand;
+	int32_t	   	length;
+	int32_t		taxoncount;
+	void		*taxontree;
+}amptotaxon_t, *pamptotaxon_t;
+
+typedef struct {
+	int32_t 	taxid;
+	void		*amptree;	
+}taxontoamp_t, *ptaxontoamp_t;

 typedef struct {
 	uint32_t        lmin;                   //**< Amplifia minimal length
@ -189,6 +247,14 @@ typedef struct {
 	bool_t        no_multi_match;
 	char		  taxonrank[20];  //TR to count ranks against a pair
 	int32_t		  taxonrankidx;   //TR to count ranks against a pair
+
+	// Some statistics useful for options filters
+
+	int32_t       dbsize;
+	int32_t		  insamples;
+	int32_t		  outsamples;
+	int32_t       intaxa;
+	int32_t       outtaxa;
 } options_t, *poptions_t;

 typedef ecoseq_t  **pecodnadb_t;
@ -232,7 +298,21 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3

 void sortmatch(pprimermatch_t table,uint32_t N);

+ppairtree_t initpairtree(ppairtree_t tree);
+ppair_t pairintree (pair_t key,ppairtree_t pairlist);
+ppair_t insertpair(pair_t key,ppairtree_t list);
+
+
 /*TR: Added*/
-pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options);
+ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options);
+
+int32_t counttaxon(int32_t taxid);
+int32_t getrankdbstats(pecodnadb_t seqdb,
+					   uint32_t seqdbsize,
+					   ecotaxonomy_t *taxonomy,
+					   poptions_t options);
+float taxonomycoverage(ppair_t pair, poptions_t options);
+char ecoComplementChar(char base);
+void taxonomyspecificity (ppair_t pair);

 #endif /* EPSORT_H_ */
--- a/src/libecoprimer/hashsequence.c
+++ b/src/libecoprimer/hashsequence.c
@ -201,3 +201,8 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word)
 		return ~0;
 }

+char ecoComplementChar(char base)
+{
+	return (base < 4)? !base & 3: 4; 
+}
+
--- a/src/libecoprimer/pairs.c
+++ b/src/libecoprimer/pairs.c
@ -7,34 +7,40 @@

 #include  "ecoprimer.h"
 #include  <string.h>
+#include  <stdlib.h>

-primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options);
+static void buildPrimerPairsForOneSeq(uint32_t seqid,
+									  pecodnadb_t seqdb,
+		 							  pprimercount_t primers,
+								 	  ppairtree_t pairs,
+									  poptions_t options);

-int32_t pairinlist (ppairs_t pairlist, word_t w1, word_t w2, uint32_t size)
-{
-	uint32_t i;
-	
-	for (i = 0; i < size; i++)
-	{
-		if (w1 == pairlist[i].w1 && w2 == pairlist[i].w2) return i;
-		if (w1 == pairlist[i].w2 && w2 == pairlist[i].w1) return i;
-	}
-	return -1;
-}

-char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid)
+
+
+
+
+/*************************************
+ *
+ *       pair collection management
+ *
+ *************************************/
+
+#ifdef MASKEDCODE
+
+char *addamplifiasetelem (ppair_t pair, char* amplifia, int32_t taxid)
 {
 	uint32_t i;
 	uint32_t j;
 	char *ampused = NULL;
-	
+
 	if(pair->ampsetcount == 0)
 	{
 		pair->ampsetcount = 500;
 		pair->ampsetindex = 0;
 		pair->ampset = ECOMALLOC(pair->ampsetcount * sizeof(ampseqset_t),"Cannot allocate amplifia set");
 	}
-	
+
 	for (i = 0; i < pair->ampsetindex; i++)
 	{
 		if (strcmp (pair->ampset[i].amplifia, amplifia) == 0)
@ -43,43 +49,43 @@ char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid)
 			break;
 		}
 	}
-	
+
 	if (i == 0)
 	{
 		pair->ampset[i].seqidcount = 100;
 		pair->ampset[i].seqidindex = 0;
 		pair->ampset[i].taxonids = ECOMALLOC(pair->ampset[i].seqidcount * sizeof(uint32_t),"Cannot allocate amplifia sequence table");
 	}
-	
+
 	if (pair->ampsetindex == pair->ampsetcount)
 	{
 		pair->ampsetcount += 500;
 		pair->ampset = ECOREALLOC(pair->ampset, pair->ampsetcount * sizeof(ampseqset_t), "Cannot allocate amplifia set");
 	}
-	
+
 	if (pair->ampset[i].seqidindex == pair->ampset[i].seqidcount)
 	{
 		pair->ampset[i].seqidcount += 100;
 		pair->ampset[i].taxonids = ECOREALLOC(pair->ampset[i].taxonids, pair->ampset[i].seqidcount * sizeof(int32_t), "Cannot allocate amplifia sequence table");
 	}
-	
+
 	if (pair->ampset[i].amplifia == NULL)
 	{
 		pair->ampset[i].amplifia = amplifia;
 		pair->ampsetindex++;
 	}
-	
+
 	for (j = 0; j < pair->ampset[i].seqidindex; j++)
 	{
 		if (pair->ampset[i].taxonids[j] == taxid) break;
 	}
-	
+
 	if (j == pair->ampset[i].seqidindex)
 		pair->ampset[i].taxonids[pair->ampset[i].seqidindex++] = taxid;
 	return ampused;
 }

-void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
+void addtaxampsetelem (ppair_t pair, int32_t taxid, char *amplifia)
 {
 	uint32_t i;
 	uint32_t j;
@ -90,42 +96,42 @@ void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
 		pair->taxsetindex = 0;
 		pair->taxset = ECOMALLOC(pair->taxsetcount * sizeof(taxampset_t),"Cannot allocate taxon set");
 	}
-	
+
 	for (i = 0; i < pair->taxsetindex; i++)
 	{
 		if (pair->taxset[i].taxonid == taxid) break;
 	}
-	
+
 	if (i == 0)
 	{
 		pair->taxset[i].amplifiacount = 100;
 		pair->taxset[i].amplifiaindex = 0;
 		pair->taxset[i].amplifia = ECOMALLOC(pair->taxset[i].amplifiacount * sizeof(char *),"Cannot allocate amplifia table");
 	}
-	
+
 	if (pair->taxsetindex == pair->taxsetcount)
 	{
 		pair->taxsetcount += 500;
 		pair->taxset = ECOREALLOC(pair->taxset, pair->taxsetcount * sizeof(taxampset_t), "Cannot allocate taxon set");
 	}
-	
+
 	if (pair->taxset[i].amplifiaindex == pair->taxset[i].amplifiacount)
 	{
 		pair->taxset[i].amplifiacount += 100;
 		pair->taxset[i].amplifia = ECOREALLOC(pair->taxset[i].amplifia, pair->taxset[i].amplifiacount * sizeof(char *), "Cannot allocate amplifia table");
 	}
-	
+
 	if (pair->taxset[i].taxonid == 0)
 	{
 		pair->taxset[i].taxonid = taxid;
 		pair->taxsetindex++;
 	}
-	
+
 	for (j = 0; j < pair->taxset[i].amplifiaindex; j++)
 	{
 		if (strcmp(pair->taxset[i].amplifia[j], amplifia) == 0) break;
 	}
-	
+
 	if (j == pair->taxset[i].amplifiaindex)
 	{
 		pair->taxset[i].amplifia[j] = amplifia;
@ -135,140 +141,62 @@ void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)

 char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len)
 {
+	fprintf(stderr,"start : %d length : %d\n",start,len);
 	char *amplifia = ECOMALLOC((len + 1) * sizeof(char),"Cannot allocate amplifia");
 	char *seqc = &seq->SQ[start];
-	
+
 	strncpy(amplifia, seqc, len);
 	return amplifia;
 }

+#endif

 /*TR: Added*/
-pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
+ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
 {
 	uint32_t i;
-	uint32_t j;
-	uint32_t k;
-	uint32_t d;
-	uint32_t strt;
-	uint32_t end;
-	uint32_t paircount = 0;
-	uint32_t pairslots = 500;
-	int32_t foundindex;
-	ppairs_t pairs;
-	pairscount_t primerpairs;
-	primermatchcount_t seqmatchcount;
-	word_t w1;
-	word_t w2;
-	char *amplifia;
-	char *oldamp;
-	
+	ppairtree_t primerpairs;

-	pairs = ECOMALLOC(pairslots * sizeof(pairs_t),"Cannot allocate pairs table");
+	primerpairs = initpairtree(NULL);

 	for (i=0; i < seqdbsize; i++)
 	{
-		seqmatchcount = buildPrimerPairsForOneSeq(i, primers, options);
-		if (seqmatchcount.matchcount == 0) continue;
-		
-		for (j=0; j < seqmatchcount.matchcount; j++)
-		{		
-			strt = 0;
-			w1 = seqmatchcount.matches[j].word;
-			/*first word should b on direct strand*/
-			if (!seqmatchcount.matches[j].strand)
-				w1 = ecoComplementWord(w1, options->primer_length);
-			else
-				strt = options->primer_length;
-			
-			for (k=j+1; k < seqmatchcount.matchcount; k++)
-			{
-				end = 0;
-				w2 = seqmatchcount.matches[k].word;
-				/*second word should be on reverse strand*/
-				if (seqmatchcount.matches[k].strand)
-					w2 = ecoComplementWord(w2, options->primer_length);
-				else
-					end = options->primer_length;
-
-				if (!(seqmatchcount.matches[j].good || seqmatchcount.matches[k].good)) continue;
-				if (w1 == w2) continue;
-				
-				d = seqmatchcount.matches[k].position - seqmatchcount.matches[j].position;
-				if (d >= options->lmin && d <= options->lmax)
-				{
-					/*get amplified string*/
-					amplifia = getamplifia (seqdb[i], seqmatchcount.matches[j].position + strt, d - strt - end);
-
-					foundindex = pairinlist(pairs, w1, w2, paircount);
-					if (foundindex != -1) /*pair is found*/
-					{
-						if (seqdb[i]->isexample)
-							pairs[foundindex].inexample++;
-						else
-							pairs[foundindex].outexample++;
-								
-						if (pairs[foundindex].mind > d) pairs[foundindex].mind = d;
-						else if (pairs[foundindex].maxd < d) pairs[foundindex].maxd = d;
-						
-						oldamp = addamplifiasetelem (&pairs[foundindex], amplifia, seqdb[i]->ranktaxonid);
-						/*if exact same string is already in amplifia set then use that for taxon set, it will help for
-						 * calculating the fully identified taxons i.e specificity, we will compare pointrs instead of strings
-						 * because same string means same pointer*/
-						if (oldamp)
-						{
-							ECOFREE (amplifia, "free amplifia");
-							amplifia = oldamp;
-						}
-						addtaxampsetelem (&pairs[foundindex], seqdb[i]->ranktaxonid, amplifia);
-
-						continue;
-					}
-
-					if (paircount == pairslots)
-					{
-						pairslots += 500;
-						pairs = ECOREALLOC(pairs, pairslots * sizeof(pairs_t), "Cannot allocate pairs table");
-					}
-					pairs[paircount].w1 = w1;
-					pairs[paircount].w2 = w2;
-					if (seqdb[i]->isexample) pairs[paircount].inexample = 1;
-					else pairs[paircount].outexample = 1;
-					pairs[paircount].mind = d;
-					pairs[paircount].maxd = d;
-					oldamp = addamplifiasetelem (&pairs[paircount], amplifia, seqdb[i]->ranktaxonid);
-					addtaxampsetelem (&pairs[paircount], seqdb[i]->ranktaxonid, amplifia);
-
-					paircount++;
-				}
-				else if (d > options->lmax)
-					break; /*once if the distance is greater than lmax then it will keep on increasing*/
-			}
-		}
-		ECOFREE(seqmatchcount.matches, "Cannot free matches table");
+		buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
 	}
-	primerpairs.pairs = ECOREALLOC(pairs, paircount * sizeof(pairs_t), "Cannot allocate pairs table");
-	primerpairs.paircount = paircount;
+
 	return primerpairs;
+
 }

-primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options)
-{
-	uint32_t i,j,k;
-	uint32_t matchcount=0;
-	pprimermatch_t matches = NULL;
-	primermatchcount_t seqmatchcount;
+#define DMAX (2000000000)
+
+static void buildPrimerPairsForOneSeq(uint32_t seqid,
+									  pecodnadb_t seqdb,
+		 							  pprimercount_t primers,
+								 	  ppairtree_t pairs,
+									  poptions_t options)
+{
+	static uint32_t    paircount=0;
+	uint32_t           i,j,k;
+	uint32_t           matchcount=0;
+	pprimermatch_t     matches = NULL;
+	primermatchcount_t seqmatchcount;
+	ppair_t            pcurrent;
+	pair_t			   current;
+	pprimer_t		   wswp;
+	bool_t			   bswp;
+	size_t			   distance;
+	bool_t             strand;

-	seqmatchcount.matchcount = 0;
-	seqmatchcount.matches = NULL;
-	
 	for (i=0;i < primers->size; i++)
 	{
 		matchcount+=primers->primers[i].directCount[seqid];
 		matchcount+=primers->primers[i].reverseCount[seqid];
 	}
-	
-	if (matchcount <= 0) return seqmatchcount;
+
+	if (matchcount <= 0)
+		return;
+
 	matches = ECOMALLOC(matchcount * sizeof(primermatch_t),"Cannot allocate primers match table");

 	for (i=0,j=0;i < primers->size; i++)
@ -277,17 +205,15 @@ primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t prime
 		{
 			if (primers->primers[i].directCount[seqid]==1)
 			{
-				matches[j].word = primers->primers[i].word;
+				matches[j].primer = primers->primers+i;
 				matches[j].strand=TRUE;
-				matches[j].good=primers->primers[i].good;/*TR: Added*/
 				matches[j].position=primers->primers[i].directPos[seqid].value;
 				j++;
 			}
 			else for (k=0; k < primers->primers[i].directCount[seqid]; k++,j++)
 			{
-				matches[j].word = primers->primers[i].word;
+				matches[j].primer = primers->primers+i;
 				matches[j].strand=TRUE;
-				matches[j].good=primers->primers[i].good;/*TR: Added*/
 				matches[j].position=primers->primers[i].directPos[seqid].pointer[k];
 			}
 		}
@ -296,26 +222,144 @@ primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t prime
 		{
 			if (primers->primers[i].reverseCount[seqid]==1)
 			{
-				matches[j].word = primers->primers[i].word;
+				matches[j].primer = primers->primers+i;
 				matches[j].strand=FALSE;
-				matches[j].good=primers->primers[i].good;/*TR: Added*/
 				matches[j].position=primers->primers[i].reversePos[seqid].value;
 				j++;
 			}
 			else for (k=0; k < primers->primers[i].reverseCount[seqid]; k++,j++)
 			{
-				matches[j].word = primers->primers[i].word;
+				matches[j].primer = primers->primers+i;
 				matches[j].strand=FALSE;
-				matches[j].good=primers->primers[i].good;/*TR: Added*/
 				matches[j].position=primers->primers[i].reversePos[seqid].pointer[k];
 			}
 		}
 	}

-	sortmatch(matches,matchcount); // sort in asscending order by position
-	
-	/*TR: Added*/
-	seqmatchcount.matches = matches;
-	seqmatchcount.matchcount = matchcount;
-	return seqmatchcount;
+	if (matchcount>1)
+	{
+//		fprintf(stderr,"\n====================================\n");
+
+		sortmatch(matches,matchcount); // sort in ascending order by position
+
+		for (i=0; i < matchcount;i++)
+		{
+			// For all primers matching the sequence
+
+			for(j=i+1;
+			       (j<matchcount)
+			    && ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
+			   j++
+			   )
+
+			   // For all not too far primers
+
+		       if ( (matches[i].primer->good || matches[j].primer->good)
+		    		&& (distance > options->lmin)
+		    		)
+		       {
+
+		    	   // If possible primer pair
+
+		    	   current.p1 = matches[i].primer;
+		    	   current.asdirect1=matches[i].strand;
+		    	   current.p2 = matches[j].primer;
+		    	   current.asdirect2= !matches[j].strand;
+		    	   current.maxd=DMAX;
+		    	   current.mind=DMAX;
+		    	   current.sumd=0;
+	    		   current.inexample=0;
+	    		   current.outexample=0;
+
+
+		    	   // Standardize the pair
+
+	    		   strand = current.p2->word > current.p1->word;
+		    	   if (!strand)
+		    	   {
+		    		   wswp = current.p1;
+		    		   current.p1=current.p2;
+		    		   current.p2=wswp;
+
+		    		   bswp = current.asdirect1;
+		    		   current.asdirect1=current.asdirect2;
+		    		   current.asdirect2=bswp;
+		    	   }
+		    	   
+
+		    	   // Look for the new pair in already seen pairs
+
+		    	   pcurrent = insertpair(current,pairs);
+
+
+		    	   if (seqdb[seqid]->isexample)
+
+				   {
+		    		   pcurrent->inexample++;
+		    		   pcurrent->sumd+=distance;
+
+		    		   if ((pcurrent->maxd==DMAX) || (distance > pcurrent->maxd))
+			    		   pcurrent->maxd = distance;
+
+			    	   if (distance < pcurrent->mind)
+			    		   pcurrent->mind = distance;
+		    	   }
+		    	   else
+		    		   pcurrent->outexample++;
+
+		    	   if ((pcurrent->outexample+pcurrent->inexample)==1)
+		    	   {
+					   paircount++;
+					   pcurrent->pcr.ampslot=200;
+					   pcurrent->pcr.ampcount=0;
+					   pcurrent->pcr.amplifias = ECOMALLOC(sizeof(amplifia_t)*pcurrent->pcr.ampslot,
+							                               "Cannot allocate amplifia table");
+		    	   }
+		    	   else
+		    	   {
+		    		   if (pcurrent->pcr.ampslot==pcurrent->pcr.ampcount)
+		    		   {
+		    			   pcurrent->pcr.ampslot+=200;
+		    			   pcurrent->pcr.amplifias = ECOREALLOC(pcurrent->pcr.amplifias,
+															    sizeof(amplifia_t)*pcurrent->pcr.ampslot,
+		    			   							            "Cannot allocate amplifia table");
+		    		   }
+		    	   }
+
+		    	   pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].length=distance;
+		    	   pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].sequence=seqdb[seqid];
+		    	   pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].strand=strand;
+
+		    	   if (strand)
+		    	   	   pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia=  seqdb[seqid]->SQ + matches[i].position + options->primer_length;
+		    	   else
+		    	   	   pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia=  seqdb[seqid]->SQ + matches[j].position - 1 ;
+
+		    	   pcurrent->pcr.ampcount++;
+//		    	   fprintf(stderr,"%c%c W1 : %s   direct : %c",
+//		    			   "bG"[(int)pcurrent->p1->good],
+//		    			   "bG"[(int)pcurrent->p2->good],
+//		    			   ecoUnhashWord(pcurrent->p1->word, options->primer_length),
+//		    			   "><"[(int)pcurrent->asdirect1]
+//		    			   );
+//
+//		    	   fprintf(stderr,"   W2 : %s   direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n",
+//		    			   ecoUnhashWord(pcurrent->p2->word, options->primer_length),
+//		    			   "><"[(int)pcurrent->asdirect2],
+//		    			   distance,
+//		    			   pcurrent->mind,pcurrent->maxd,
+//		    			   (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0,
+//		    			   pcurrent->inexample,pcurrent->outexample,
+//		    			   " N"[(pcurrent->outexample+pcurrent->inexample)==1],
+//		    			   paircount
+//
+//		    			   );
+//
+
+		       }
+		 }
+	}
+
+   pairs->count=paircount;
+
 }
--- a/tools/ecoPCRFormat.py
+++ b/tools/ecoPCRFormat.py
@ -0,0 +1,651 @@
+#!/usr/bin/env python
+
+import re
+import gzip
+import struct
+import sys
+import time
+import getopt
+
+try:
+    import psycopg2
+    _dbenable=True
+except ImportError:
+    _dbenable=False
+
+#####
+#
+#
+# Generic file function
+#
+#
+#####
+
+def universalOpen(file):
+    if isinstance(file,str):
+        if file[-3:] == '.gz':
+            rep = gzip.open(file)
+        else:
+            rep = open(file)
+    else:
+        rep = file
+    return rep
+
+def universalTell(file):
+    if isinstance(file, gzip.GzipFile):
+        file=file.myfileobj
+    return file.tell()
+
+def fileSize(file):
+    if isinstance(file, gzip.GzipFile):
+        file=file.myfileobj
+    pos = file.tell()
+    file.seek(0,2)
+    length = file.tell()
+    file.seek(pos,0)
+    return length
+
+def progressBar(pos,max,reset=False,delta=[]):
+    if reset:
+        del delta[:]
+    if not delta:
+        delta.append(time.time())
+        delta.append(time.time())
+
+    delta[1]=time.time()
+    elapsed = delta[1]-delta[0]
+    percent = float(pos)/max * 100
+    remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
+    bar = '#' * int(percent/2)
+    bar+= '|/-\\-'[pos % 5]
+    bar+= ' ' * (50 - int(percent/2))
+    sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
+
+#####
+#
+#
+# NCBI Dump Taxonomy reader
+#
+#
+#####
+
+def endLessIterator(endedlist):
+    for x in endedlist:
+        yield x
+    while(1):
+        yield endedlist[-1]
+   
+class ColumnFile(object):
+    
+    def __init__(self,stream,sep=None,strip=True,types=None):
+        if isinstance(stream,str):
+            self._stream = open(stream)
+        elif hasattr(stream,'next'):
+            self._stream = stream
+        else:
+            raise ValueError,'stream must be string or an iterator'
+        self._delimiter=sep
+        self._strip=strip
+        if types:
+            self._types=[x for x in types]
+            for i in xrange(len(self._types)):
+                if self._types[i] is bool:
+                    self._types[i]=ColumnFile.str2bool
+        else:
+            self._types=None
+            
+    def str2bool(x):
+        return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
+                    
+    str2bool = staticmethod(str2bool)
+            
+        
+    def __iter__(self):
+        return self
+    
+    def next(self):
+        ligne = self._stream.next()
+        data = ligne.split(self._delimiter)
+        if self._strip or self._types:
+            data = [x.strip() for x in data]
+        if self._types:
+            it = endLessIterator(self._types)
+            data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+        return data
+    
+def taxonCmp(t1,t2):
+    if t1[0] < t2[0]:
+        return -1
+    elif t1[0] > t2[0]:
+        return +1
+    return 0
+
+def bsearchTaxon(taxonomy,taxid):
+    taxCount = len(taxonomy)
+    begin = 0
+    end   = taxCount 
+    oldcheck=taxCount
+    check = begin + end / 2
+    while check != oldcheck and taxonomy[check][0]!=taxid :
+        if taxonomy[check][0] < taxid:
+            begin=check
+        else:
+            end=check
+        oldcheck=check
+        check = (begin + end) / 2
+        
+        
+    if taxonomy[check][0]==taxid:
+        return check
+    else:
+        return None
+        
+    
+    
+def readNodeTable(file):
+
+    file = universalOpen(file)
+    
+    nodes = ColumnFile(file, 
+                       sep='|', 
+                       types=(int,int,str,
+                              str,str,bool,
+                              int,bool,int,
+                              bool,bool,bool,str))
+    print >>sys.stderr,"Reading taxonomy dump file..."
+    taxonomy=[[n[0],n[2],n[1]] for n in nodes]
+    print >>sys.stderr,"List all taxonomy rank..."    
+    ranks =list(set(x[1] for x in taxonomy))
+    ranks.sort()
+    ranks = dict(map(None,ranks,xrange(len(ranks))))
+    
+    print >>sys.stderr,"Sorting taxons..."
+    taxonomy.sort(taxonCmp)
+
+    print >>sys.stderr,"Indexing taxonomy..."
+    index = {}
+    for t in taxonomy:
+        index[t[0]]=bsearchTaxon(taxonomy, t[0])
+    
+    print >>sys.stderr,"Indexing parent and rank..."
+    for t in taxonomy:
+        t[1]=ranks[t[1]]
+        t[2]=index[t[2]]
+        
+        
+    return taxonomy,ranks,index
+
+def nameIterator(file):
+    file = universalOpen(file)
+    names = ColumnFile(file, 
+                       sep='|', 
+                       types=(int,str,
+                              str,str))
+    for taxid,name,unique,classname,white in names:
+        yield taxid,name,classname
+        
+def mergedNodeIterator(file):
+    file = universalOpen(file)
+    merged = ColumnFile(file, 
+                       sep='|', 
+                       types=(int,int,str))
+    for taxid,current,white in merged:
+            yield taxid,current
+    
+def deletedNodeIterator(file):
+    file = universalOpen(file)
+    deleted = ColumnFile(file, 
+                       sep='|', 
+                       types=(int,str))
+    for taxid,white in deleted:
+            yield taxid
+    
+def readTaxonomyDump(taxdir):
+    taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
+    
+    print >>sys.stderr,"Adding scientific name..."
+
+    alternativeName=[]
+    for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
+        alternativeName.append((name,classname,index[taxid]))
+        if classname == 'scientific name':
+            taxonomy[index[taxid]].append(name)
+        
+    print >>sys.stderr,"Adding taxid alias..."
+    for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
+        index[taxid]=index[current]
+    
+    print >>sys.stderr,"Adding deleted taxid..."
+    for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
+        index[taxid]=None
+    
+    return taxonomy,ranks,alternativeName,index
+
+def readTaxonomyDB(dbname):
+    connection = psycopg2.connect(database=dbname)
+    
+    cursor = connection.cursor()
+    cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
+    taxonomy=[list(x) for x in cursor]
+
+    cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
+    ranks=cursor.fetchall()
+    ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
+    
+    print >>sys.stderr,"Sorting taxons..."
+    taxonomy.sort(taxonCmp)
+
+    print >>sys.stderr,"Indexing taxonomy..."
+    index = {}
+    for t in taxonomy:
+        index[t[0]]=bsearchTaxon(taxonomy, t[0])
+    
+    print >>sys.stderr,"Indexing parent and rank..."
+    for t in taxonomy:
+        t[1]=ranks[t[1]]
+        try:
+            t[2]=index[t[2]]
+        except KeyError,e:
+            if t[2] is None and t[0]==1:
+                t[2]=index[t[0]]
+            else:
+                raise e
+        
+    cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
+    
+    alternativeName=[]
+    for taxid,name,classname in cursor:
+        alternativeName.append((name,classname,index[taxid]))
+        if classname == 'scientific name':
+            taxonomy[index[taxid]].append(name)
+    
+    cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
+ 
+    print >>sys.stderr,"Adding taxid alias..."
+    for taxid,current in cursor:
+        if current is not None:
+            index[taxid]=index[current]
+        else:
+            index[taxid]=None
+            
+       
+    return taxonomy,ranks,alternativeName,index
+    
+#####
+#
+#
+#  Genbank/EMBL sequence reader
+#
+#
+#####
+
+def entryIterator(file):
+    file = universalOpen(file)
+    rep =[]
+    for ligne in file:
+        rep.append(ligne)
+        if ligne == '//\n':
+            rep = ''.join(rep)
+            yield rep
+            rep = []
+            
+def fastaEntryIterator(file):
+    file = universalOpen(file)
+    rep =[]
+    for ligne in file:
+        if ligne[0] == '>' and rep:
+            rep = ''.join(rep)
+            yield rep
+            rep = []
+        rep.append(ligne)
+    if rep:
+        rep = ''.join(rep)
+        yield rep
+    
+_cleanSeq = re.compile('[ \n0-9]+')
+            
+def cleanSeq(seq):
+    return _cleanSeq.sub('',seq)
+    
+    
+_gbParseID = re.compile('(?<=^LOCUS {7})[^ ]+(?= )',re.MULTILINE)   
+_gbParseDE = re.compile('(?<=^DEFINITION {2}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)   
+_gbParseSQ = re.compile('(?<=^ORIGIN).+?(?=^//$)',re.MULTILINE+re.DOTALL)  
+_gbParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
+  
+def genbankEntryParser(entry):
+    Id = _gbParseID.findall(entry)[0]
+    De = ' '.join(_gbParseDE.findall(entry)[0].split())
+    Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper())
+    try:
+        Tx = int(_gbParseTX.findall(entry)[0])
+    except IndexError:
+        Tx = None
+    return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
+
+######################
+
+_cleanDef = re.compile('[\nDE]')
+
+def cleanDef(definition):
+    return _cleanDef.sub('',definition)
+
+_emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE)   
+_emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)   
+_emblParseSQ = re.compile('(?<=^  ).+?(?=^//$)',re.MULTILINE+re.DOTALL)  
+_emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
+
+def emblEntryParser(entry):
+    Id = _emblParseID.findall(entry)[0]
+    De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split())
+    Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper())
+    try:
+        Tx = int(_emblParseTX.findall(entry)[0])
+    except IndexError:
+        Tx = None
+    return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
+
+
+######################
+
+_fastaSplit=re.compile(';\W*')
+
+def parseFasta(seq):
+    seq=seq.split('\n')
+    title = seq[0].strip()[1:].split(None,1)
+    id=title[0]
+    if len(title) == 2:
+        field = _fastaSplit.split(title[1])
+    else:
+        field=[]
+    info = dict(x.split('=',1) for x in field if '=' in x)
+    definition = ' '.join([x for x in field if '=' not in x])
+    seq=(''.join([x.strip() for x in seq[1:]])).upper()   
+    return id,seq,definition,info
+
+  
+def fastaEntryParser(entry):
+    id,seq,definition,info = parseFasta(entry)
+    Tx = info.get('taxid',None)   
+    if Tx is not None:
+        Tx=int(Tx)
+    return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq}
+    
+
+def sequenceIteratorFactory(entryParser,entryIterator):
+    def sequenceIterator(file):
+        for entry in entryIterator(file):
+            yield entryParser(entry)
+    return sequenceIterator
+
+
+def taxonomyInfo(entry,connection):
+    taxid = entry['taxid']
+    curseur = connection.cursor()
+    curseur.execute("""
+                        select taxid,species,genus,family,
+                               taxonomy.scientificName(taxid) as sn,
+                               taxonomy.scientificName(species) as species_sn,
+                               taxonomy.scientificName(genus) as genus_sn,
+                               taxonomy.scientificName(family) as family_sn
+                        from
+                            (   
+                             select alias                      as taxid,
+                               taxonomy.getSpecies(alias) as species,
+                               taxonomy.getGenus(alias)   as genus,
+                               taxonomy.getFamily(alias)  as family
+                                from taxonomy.aliases
+                               where id=%d ) as tax
+                    """ % taxid)
+    rep = curseur.fetchone()
+    entry['current_taxid']=rep[0]
+    entry['species']=rep[1]
+    entry['genus']=rep[2]
+    entry['family']=rep[3]
+    entry['scientific_name']=rep[4]
+    entry['species_sn']=rep[5]
+    entry['genus_sn']=rep[6]
+    entry['family_sn']=rep[7]
+    return entry
+    
+#####
+#
+#
+# Binary writer
+#
+#
+#####
+    
+def ecoSeqPacker(sq):
+    
+    compactseq = gzip.zlib.compress(sq['sequence'],9)
+    cptseqlength  = len(compactseq)
+    delength   = len(sq['definition'])
+    
+    totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
+    
+    packed = struct.pack('> I I 20s I I I %ds %ds' % (delength,cptseqlength),
+                         totalSize,
+                         sq['taxid'],
+                         sq['id'],
+                         delength,
+                         len(sq['sequence']),
+                         cptseqlength,
+                         sq['definition'],
+                         compactseq)
+    
+    assert len(packed) == totalSize+4, "error in sequence packing"
+    
+    return packed
+
+def ecoTaxPacker(tx):
+    
+    namelength = len(tx[3])
+    
+    totalSize = 4 + 4 + 4 + 4 + namelength
+    
+    packed = struct.pack('> I I I I I %ds' % namelength, 
+                         totalSize, 
+                         tx[0],
+                         tx[1],
+                         tx[2], 
+                         namelength,
+                         tx[3])
+    
+    return packed
+
+def ecoRankPacker(rank):
+    
+    namelength = len(rank)
+    
+    packed = struct.pack('> I %ds' % namelength,
+                         namelength,
+                         rank)
+    
+    return packed
+                
+def ecoNamePacker(name):
+    
+    namelength = len(name[0])
+    classlength= len(name[1])
+    totalSize =  namelength + classlength + 4 + 4 + 4 + 4
+    
+    packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
+                         totalSize,
+                         int(name[1]=='scientific name'),
+                         namelength,
+                         classlength,
+                         name[2],
+                         name[0],
+                         name[1])
+    
+    return packed
+    
+def ecoSeqWriter(file,input,taxindex,parser):
+    output = open(file,'wb')
+    input  = universalOpen(input)
+    inputsize = fileSize(input)
+    entries = parser(input)
+    seqcount=0
+    skipped = []
+
+    output.write(struct.pack('> I',seqcount))
+    
+    progressBar(1, inputsize,reset=True)
+    for entry in entries:
+        if entry['taxid'] is not None:
+            try:
+                entry['taxid']=taxindex[entry['taxid']]
+            except KeyError:
+                entry['taxid']=None
+            if entry['taxid'] is not None:
+                seqcount+=1
+                output.write(ecoSeqPacker(entry))
+            else:
+                skipped.append(entry['id'])
+            where = universalTell(input)
+            progressBar(where, inputsize)
+            print >>sys.stderr," Readed sequences : %d     " % seqcount,
+        else:
+            skipped.append(entry['id'])
+        
+    print >>sys.stderr
+    output.seek(0,0)
+    output.write(struct.pack('> I',seqcount))
+    
+    output.close()
+    return skipped
+        
+
+def ecoTaxWriter(file,taxonomy):
+    output = open(file,'wb')
+    output.write(struct.pack('> I',len(taxonomy)))
+    
+    for tx in taxonomy:
+        output.write(ecoTaxPacker(tx))
+
+    output.close()
+    
+def ecoRankWriter(file,ranks):
+    output = open(file,'wb')
+    output.write(struct.pack('> I',len(ranks)))
+
+    rankNames = ranks.keys()
+    rankNames.sort()
+    
+    for rank in rankNames:
+        output.write(ecoRankPacker(rank))
+
+    output.close()
+
+def nameCmp(n1,n2):
+    name1=n1[0].upper()
+    name2=n2[0].upper()
+    if name1 < name2:
+        return -1
+    elif name1 > name2:
+        return 1
+    return 0
+
+
+def ecoNameWriter(file,names):
+    output = open(file,'wb')
+    output.write(struct.pack('> I',len(names)))
+
+    names.sort(nameCmp)
+    
+    for name in names:
+        output.write(ecoNamePacker(name))
+
+    output.close()
+    
+def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
+    
+    ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
+    ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
+    ecoNameWriter('%s.ndx' % prefix, taxonomy[2])
+  
+    filecount = 0
+    for filename in seqFileNames:
+        filecount+=1
+        sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount), 
+                     filename, 
+                     taxonomy[3], 
+                     parser)
+        if sk:
+            print >>sys.stderr,"Skipped entry :"
+            print >>sys.stderr,sk
+        
+def ecoParseOptions(arguments):
+    opt = {
+            'prefix' : 'ecodb',
+            'taxdir' : 'taxdump',
+            'parser' : sequenceIteratorFactory(genbankEntryParser,
+                                                  entryIterator)
+           }
+    
+    o,filenames = getopt.getopt(arguments,
+                                'ht:T:n:gfe',
+                                ['help',
+                                 'taxonomy=',
+                                 'taxonomy_db=',
+                                 'name=',
+                                 'genbank',
+                                 'fasta',
+                                 'embl'])
+    
+    for name,value in o:
+        if name in ('-h','--help'):
+            printHelp()
+            exit()
+        elif name in ('-t','--taxonomy'):
+            opt['taxmod']='dump'
+            opt['taxdir']=value
+        elif name in ('-T','--taxonomy_db'):
+            opt['taxmod']='db'
+            opt['taxdb']=value
+        elif name in ('-n','--name'):
+            opt['prefix']=value
+        elif name in ('-g','--genbank'):
+            opt['parser']=sequenceIteratorFactory(genbankEntryParser,
+                                                  entryIterator)
+
+        elif name in ('-f','--fasta'):
+            opt['parser']=sequenceIteratorFactory(fastaEntryParser,
+                                                  fastaEntryIterator)
+            
+        elif name in ('-e','--embl'):
+            opt['parser']=sequenceIteratorFactory(emblEntryParser,
+                                                  entryIterator)
+        else:
+            raise ValueError,'Unknown option %s' % name
+
+    return opt,filenames
+
+def printHelp():
+    print "-----------------------------------"
+    print " ecoPCRFormat.py"
+    print "-----------------------------------"
+    print "ecoPCRFormat.py [option] <argument>"
+    print "-----------------------------------"
+    print "-e    --embl        :[E]mbl format"
+    print "-f    --fasta       :[F]asta format"
+    print "-g    --genbank     :[G]enbank format"
+    print "-h    --help        :[H]elp - print this help"
+    print "-n    --name        :[N]ame of the new database created"
+    print "-t    --taxonomy    :[T]axonomy - path to the taxonomy database"
+    print "                    :bcp-like dump from GenBank taxonomy database."
+    print "-----------------------------------"
+
+if __name__ == '__main__':
+    
+    opt,filenames = ecoParseOptions(sys.argv[1:])
+    
+    if opt['taxmod']=='dump':
+        taxonomy = readTaxonomyDump(opt['taxdir'])
+    elif opt['taxmod']=='db':
+        taxonomy = readTaxonomyDB(opt['taxdb'])
+        
+   
+    ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
+