ecoprimers/src/ecoprimer.c

/*
 * ecoprimer.c
 *
 *  Created on: 7 nov. 2008
 *      Author: coissac
 */

#include "libecoprimer/ecoprimer.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <getopt.h>
#include <time.h>
#include <sys/time.h>

#define VERSION "0.1"
  /* TR: by default, statistics are made on species level*/
#define DEFULTTAXONRANK "species"

/* ----------------------------------------------- */
/* printout help                                   */
/* ----------------------------------------------- */
#define PP fprintf(stdout,

static void PrintHelp()
{
        PP      "------------------------------------------\n");
        PP      " ecoPrimer Version %s\n", VERSION);
        PP      "------------------------------------------\n");
}

static void ExitUsage(int stat)
{
        PP      "usage: ecoprimer [-d database] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-R rank] [-t taxon level]\n");
        PP      "type \"ecoprimer -h\" for help\n");

        if (stat)
            exit(stat);
}

#undef  PP

void initoptions(poptions_t options)
{
	options->lmin=0;                   //< Amplifia minimal length
	options->lmax=0;                   //< Amplifia maximal length
	options->error_max=3;              //**< maximum error count in fuzzy search
	options->primer_length=18;          //**< minimal length of the primers
	options->restricted_taxid=NULL;      //**< limit amplification below these taxid
	options->ignored_taxid=NULL;         //**< no amplification below these taxid
	options->prefix=NULL;
	options->circular=0;
	options->doublestrand=1;
	options->strict_quorum=0.7;
	options->strict_exclude_quorum=0.1;
	options->sensitivity_quorum=0.9;
	options->false_positive_quorum=0.1;
	options->strict_three_prime=2;
	options->r=0;
	options->g=0;
	options->no_multi_match=FALSE;
	strcpy(options->taxonrank, DEFULTTAXONRANK);			/*taxon level for results, species by default*/
}

void printcurrenttime ()
{
    time_t     now;
    struct tm  *ts;
    char       buf[80];

    /* Get the current time */
    now = time(NULL);

    /* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
    ts = localtime(&now);
    strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
    fprintf(stderr,"#%d#, %s\n",now, buf);
 }

void printcurrenttimeinmilli()
{
     struct timeval tv;
     struct timezone tz;
     struct tm *tm;
     gettimeofday(&tv, &tz);
     tm=localtime(&tv.tv_sec);
     fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min,
              tm->tm_sec, tv.tv_usec, tv.tv_usec/1000);
}

/*TR: Added*/
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
{
	uint32_t i;
	uint32_t  wordsize = options->primer_length;
	uint32_t	 quorumseqs;
	double sens;
	double speci;
	float  avg;
	
	quorumseqs = seqdbsize * 70 / 100;
	
	printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);

	for (i=0; i < pairs.paircount; i++)
	{
		if (quorumseqs > pairs.pairs[i].inexample) continue;
		sens = (pairs.pairs[i].taxsetindex*1.0)/rankdbstats*100;
		speci = (pairs.pairs[i].oktaxoncount*1.0)/pairs.pairs[i].taxsetindex*100;
		avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2;

		printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize));
		printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize));
		printf("\t%d", pairs.pairs[i].inexample);
		printf("\t%3.2f", sens);
		printf("\t%3.2f", speci);
		printf("\t%d", pairs.pairs[i].mind);
		printf("\t%d", pairs.pairs[i].maxd);
		printf("\t%3.2f\n", avg);
	}
}

/*updateseqparams: This function counts the insample and outsample sequences
 *  and with each sequences adds a tag of the taxon to which the sequence beongs*/
void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, 
		poptions_t options, int32_t *insamples, int32_t *outsamples)
{
	uint32_t i;
	int32_t taxid;
	ecotx_t  *tmptaxon;

    for (i=0;i<seqdbsize;i++)
    {    	
    	seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
    	if (seqdb[i]->isexample)
    		(*insamples)++;
    	else
    		(*outsamples)++;

    	taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
		tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);		
		if (tmptaxon)
			tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
		if (tmptaxon)
			seqdb[i]->ranktaxonid = tmptaxon->taxid;
    }
}

void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
{
	int32_t i;

    /*set taxon rank for which result is to be given*/
    for (i = 0; i < taxonomy->ranks->count; i++)
    {
    	if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0) 
    	{
    		options->taxonrankidx = i;
    		break;
    	}
    }

    if (i == taxonomy->ranks->count)
    {
    	fprintf(stderr,"\nUnknown taxon level: '%s'\n", options->taxonrank);
    	exit(0);
    }
}
/* to get db stats, totals of species, genus etc....*/
int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, 
		poptions_t options)
{
	uint32_t i;
	uint32_t j;
	uint32_t nameslots = 500;
	uint32_t namesindex = 0;
	int32_t *ranktaxonids = ECOMALLOC(nameslots * sizeof(int32_t), "Error in taxon rank allocation");
	int32_t taxid;
	
	ecotx_t  *tmptaxon;

    for (i=0;i<seqdbsize;i++)
	{
    	taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
		tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
		if (tmptaxon)
			tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
		if (tmptaxon)
		{
			for (j = 0; j < namesindex; j++)
			{
				if (tmptaxon->taxid == ranktaxonids[j]) break;
			}
			if (j < namesindex) continue; /* name is already in list, so no need to add it*/
			
			if (namesindex == nameslots)
			{
				nameslots += 500;
				ranktaxonids = ECOREALLOC(ranktaxonids, nameslots * sizeof(int32_t), "Cannot allocate pair rank taxon table");
			}
			ranktaxonids[namesindex] = tmptaxon->taxid;
			namesindex++;
		}
	}
    ECOFREE(ranktaxonids, "free rank taxon table");
    
    return namesindex;
}

void setoktaxforspecificity (ppairscount_t pairs)
{
	uint32_t i;
	uint32_t j;
	uint32_t k;
	uint32_t l;
	int taxcount;
	int32_t taxid;
	
	for (i = 0; i < pairs->paircount; i++)
	{
		for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
		{
			for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++)
			{
				taxid = 0;
				taxcount = 0;
				for (l = 0; l < pairs->pairs[i].ampsetindex; l++)
				{
					/*compare only char pointers because for equal strings we have same pointer in both sets*/
					if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia)
					{
						if (pairs->pairs[i].ampset[l].seqidindex > 1)
						{
							taxcount += pairs->pairs[i].ampset[l].seqidindex;
							break;
						}
						
						if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
						{
							if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
							taxcount++;	
						}
						
						if (taxcount > 1) break;
					}
				}
				if (taxcount == 1) pairs->pairs[i].oktaxoncount++;
			}
		}
	}
}

int main(int argc, char **argv)
{
	pecodnadb_t   seqdb; /* of type ecoseq_t */
	uint32_t        seqdbsize=0;
	ecotaxonomy_t *taxonomy;

	options_t     options;
	int           carg;
	int32_t       errflag=0;

	int32_t       insamples=0;
	int32_t       outsamples=0;
	uint32_t        i;
	
	pwordcount_t  words;
	pprimercount_t     primers;
	pairscount_t		pairs;
	
	int32_t		  rankdbstats = 0;

	//printcurrenttime();
	//return 0;

	initoptions(&options);

    while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) {

     switch (carg) {
                                /* -------------------- */
        case 'd':               /* database name        */
                                /* -------------------- */
        	options.prefix = ECOMALLOC(strlen(optarg)+1,
                              "Error on prefix allocation");
           strcpy(options.prefix,optarg);
           break;

                                /* -------------------- */
        case 'h':               /* help                 */
                                /* -------------------- */
           PrintHelp();
           exit(0);
           break;

                                /* ------------------------- */
        case 'l':               /* min amplification lenght  */
                                /* ------------------------- */
           sscanf(optarg,"%d",&(options.lmin));
           break;

                                /* -------------------------- */
        case 'L':               /* max amplification lenght   */
                                /* -------------------------- */
          sscanf(optarg,"%d",&(options.lmax));
          break;

                                /* -------------------- */
        case 'e':               /* error max            */
                                /* -------------------- */
          sscanf(optarg,"%d",&(options.error_max));
          break;


								/* ------------------------ */
		case '3':               /* three prime strict match */
                                /* ------------------------ */
			sscanf(optarg,"%d",&(options.strict_three_prime));
			break;

                                /* -------------------- */
        case 'q':               /* strict matching quorum           */
                                /* -------------------- */
          sscanf(optarg,"%f",&(options.strict_quorum));
          break;

								/* -------------------- */
        case 's':               /* strict matching quorum      */
								/* -------------------- */
        	sscanf(optarg,"%f",&(options.sensitivity_quorum));
        	break;

        						/* -------------------- */
        case 't':               /* required taxon level for results           */
        						/* -------------------- */
        	strncpy(options.taxonrank, optarg, 19);
        	options.taxonrank[19] = 0;
            break;
        	
        						/* -------------------- */
        case 'x':               /* strict matching quorum           */
								/* -------------------- */
        	sscanf(optarg,"%f",&(options.false_positive_quorum));
        	break;

							    /* ---------------------------- */
		case 'D':               /* set in double strand mode    */
							    /* ---------------------------- */
			options.doublestrand=1;
			break;

								/* ---------------------------- */
		case 'S':               /* set in single strand mode    */
								/* ---------------------------- */
			options.doublestrand=0;
			break;

			                    /* ---------------------------- */
        case 'U':               /* set in single strand mode    */
			                    /* ---------------------------- */
            options.no_multi_match=TRUE;
            break;

         						/* ------------------------------------------ */
        case 'r':               /* stores the restricting search taxonomic id */
        						/* ------------------------------------------ */
          options.restricted_taxid = ECOREALLOC(options.restricted_taxid,sizeof(int32_t)*(options.r+1),
          									"Error on restricted_taxid reallocation");
          sscanf(optarg,"%d",&(options.restricted_taxid[options.r]));
          options.r++;
          break;

         						/* --------------------------------- */
        case 'i':               /* stores the taxonomic id to ignore */
        						/* --------------------------------- */
          options.ignored_taxid = ECOREALLOC(options.ignored_taxid,sizeof(int32_t)*(options.g+1),
          									"Error on excluded_taxid reallocation");
          sscanf(optarg,"%d",&(options.ignored_taxid[options.g]));
          options.g++;
          break;

                                /* -------------------- */
        case 'c':               /* sequences are circular */
        						/* --------------------------------- */
		  options.circular = 1;
          break;

        case '?':               /* bad option           */
                                /* -------------------- */
            errflag++;
   		}

	}

    fprintf(stderr,"Reading taxonomy database ...");
    taxonomy = read_taxonomy(options.prefix,0);
    fprintf(stderr,"Ok\n");
    
    setresulttaxonrank(taxonomy, &options); /*TR: set rank level for statistics*/
    
    fprintf(stderr,"Reading sequence database ...\n");

    seqdb = readdnadb(options.prefix,&seqdbsize);

    fprintf(stderr,"Ok\n");
    fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);
    
    updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples);

    rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options);

    fprintf(stderr,"Database is constituted of %5d examples\n",insamples);
    fprintf(stderr,"                       and %5d counterexamples\n",outsamples);
    fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats);

    fprintf(stderr,"\nIndexing words in sequences\n");

    printcurrenttimeinmilli();
    words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
    printcurrenttimeinmilli();
    
    fprintf(stderr,"\n  Strict primer count : %d\n",words->size);

    if (options.no_multi_match)
    {
    	(void)filterMultiStrictPrimer(words);
    	fprintf(stderr,"\n  Strict primer with single match count : %d\n",words->size);
    }


    fprintf(stderr,"\n\n  Primer sample : \n");
    for (i=0; i<MINI(10,words->size); i++)
    	fprintf(stderr,"  + Primer : %s   sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);


    fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");
    for (i=0;i<seqdbsize;i++)
    {
    	encodeSequence(seqdb[i]);
    	fprintf(stderr,"  Encoded sequences %5d/%5d          \r",(int32_t)i+1,(int32_t)seqdbsize);
    }

    ECOFREE(words->strictcount,"Free strict primer count table");

    primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);

    ECOFREE(words->words,"Free strict primer table");
    ECOFREE(words,"Free strict primer structure");
    fprintf(stderr,"\n\n  Approximate repeats :%d \n", primers->size);

    fprintf(stderr,"\n\n  Primer sample : \n");
    for (i=0; i<MINI(10,primers->size); i++)
    	fprintf(stderr,"  + Primer : %s   example sequence count : %5d counterexample sequence count : %5d  status : %s\n",ecoUnhashWord(primers->primers[i].word,options.primer_length),
                       primers->primers[i].inexample,
                       primers->primers[i].outexample,
                       primers->primers[i].good ? "good":"bad");

    fprintf(stderr,"\n");

    /*TR: Added*/
    pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
    setoktaxforspecificity (&pairs);
    
    printpairs (pairs, &options, rankdbstats, seqdbsize);
    
    
    ECOFREE(pairs.pairs,"Free pairs table");

    return 0;
}
New version based on sort them merge algorithm git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@180 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2009-03-04 22:32:55 +00:00			`/*`
			`* ecoprimer.c`
			`*`
			`* Created on: 7 nov. 2008`
			`* Author: coissac`
			`*/`

			`#include "libecoprimer/ecoprimer.h"`
			`#include <stdio.h>`
			`#include <string.h>`
			`#include <ctype.h>`
			`#include <stdlib.h>`
			`#include <getopt.h>`
			`#include <time.h>`
			`#include <sys/time.h>`

			`#define VERSION "0.1"`
			`/* TR: by default, statistics are made on species level*/`
			`#define DEFULTTAXONRANK "species"`

			`/* ----------------------------------------------- */`
			`/* printout help */`
			`/* ----------------------------------------------- */`
			`#define PP fprintf(stdout,`

			`static void PrintHelp()`
			`{`
			`PP "------------------------------------------\n");`
			`PP " ecoPrimer Version %s\n", VERSION);`
			`PP "------------------------------------------\n");`
			`}`

			`static void ExitUsage(int stat)`
			`{`
			`PP "usage: ecoprimer [-d database] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-R rank] [-t taxon level]\n");`
			`PP "type \"ecoprimer -h\" for help\n");`

			`if (stat)`
			`exit(stat);`
			`}`

			`#undef PP`

			`void initoptions(poptions_t options)`
			`{`
			`options->lmin=0; //< Amplifia minimal length`
			`options->lmax=0; //< Amplifia maximal length`
			`options->error_max=3; //**< maximum error count in fuzzy search`
			`options->primer_length=18; //**< minimal length of the primers`
			`options->restricted_taxid=NULL; //**< limit amplification below these taxid`
			`options->ignored_taxid=NULL; //**< no amplification below these taxid`
			`options->prefix=NULL;`
			`options->circular=0;`
			`options->doublestrand=1;`
			`options->strict_quorum=0.7;`
			`options->strict_exclude_quorum=0.1;`
			`options->sensitivity_quorum=0.9;`
			`options->false_positive_quorum=0.1;`
			`options->strict_three_prime=2;`
			`options->r=0;`
			`options->g=0;`
			`options->no_multi_match=FALSE;`
			`strcpy(options->taxonrank, DEFULTTAXONRANK); /taxon level for results, species by default/`
			`}`

			`void printcurrenttime ()`
			`{`
			`time_t now;`
			`struct tm *ts;`
			`char buf[80];`

			`/* Get the current time */`
			`now = time(NULL);`

			`/* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */`
			`ts = localtime(&now);`
			`strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);`
			`fprintf(stderr,"#%d#, %s\n",now, buf);`
			`}`

			`void printcurrenttimeinmilli()`
			`{`
			`struct timeval tv;`
			`struct timezone tz;`
			`struct tm *tm;`
			`gettimeofday(&tv, &tz);`
			`tm=localtime(&tv.tv_sec);`
			`fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min,`
			`tm->tm_sec, tv.tv_usec, tv.tv_usec/1000);`
			`}`

			`/TR: Added/`
			`void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)`
			`{`
			`uint32_t i;`
			`uint32_t wordsize = options->primer_length;`
			`uint32_t quorumseqs;`
			`double sens;`
			`double speci;`
			`float avg;`

			`quorumseqs = seqdbsize * 70 / 100;`

			`printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);`

			`for (i=0; i < pairs.paircount; i++)`
			`{`
			`if (quorumseqs > pairs.pairs[i].inexample) continue;`
			`sens = (pairs.pairs[i].taxsetindex1.0)/rankdbstats100;`
			`speci = (pairs.pairs[i].oktaxoncount1.0)/pairs.pairs[i].taxsetindex100;`
			`avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2;`

			`printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize));`
			`printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize));`
			`printf("\t%d", pairs.pairs[i].inexample);`
			`printf("\t%3.2f", sens);`
			`printf("\t%3.2f", speci);`
			`printf("\t%d", pairs.pairs[i].mind);`
			`printf("\t%d", pairs.pairs[i].maxd);`
			`printf("\t%3.2f\n", avg);`
			`}`
			`}`

			`/*updateseqparams: This function counts the insample and outsample sequences`
			`* and with each sequences adds a tag of the taxon to which the sequence beongs*/`
			`void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,`
			`poptions_t options, int32_t insamples, int32_t outsamples)`
			`{`
			`uint32_t i;`
			`int32_t taxid;`
			`ecotx_t *tmptaxon;`

			`for (i=0;i<seqdbsize;i++)`
			`{`
			`seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);`
			`if (seqdb[i]->isexample)`
			`(*insamples)++;`
			`else`
			`(*outsamples)++;`

			`taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;`
			`tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);`
			`if (tmptaxon)`
			`tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);`
			`if (tmptaxon)`
			`seqdb[i]->ranktaxonid = tmptaxon->taxid;`
			`}`
			`}`

			`void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)`
			`{`
			`int32_t i;`

			`/set taxon rank for which result is to be given/`
			`for (i = 0; i < taxonomy->ranks->count; i++)`
			`{`
			`if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0)`
			`{`
			`options->taxonrankidx = i;`
			`break;`
			`}`
			`}`

			`if (i == taxonomy->ranks->count)`
			`{`
			`fprintf(stderr,"\nUnknown taxon level: '%s'\n", options->taxonrank);`
			`exit(0);`
			`}`
			`}`
			`/* to get db stats, totals of species, genus etc....*/`
			`int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,`
			`poptions_t options)`
			`{`
			`uint32_t i;`
			`uint32_t j;`
			`uint32_t nameslots = 500;`
			`uint32_t namesindex = 0;`
			`int32_t ranktaxonids = ECOMALLOC(nameslots sizeof(int32_t), "Error in taxon rank allocation");`
			`int32_t taxid;`

			`ecotx_t *tmptaxon;`

			`for (i=0;i<seqdbsize;i++)`
			`{`
			`taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;`
			`tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);`
			`if (tmptaxon)`
			`tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);`
			`if (tmptaxon)`
			`{`
			`for (j = 0; j < namesindex; j++)`
			`{`
			`if (tmptaxon->taxid == ranktaxonids[j]) break;`
			`}`
			`if (j < namesindex) continue; /* name is already in list, so no need to add it*/`

			`if (namesindex == nameslots)`
			`{`
			`nameslots += 500;`
			`ranktaxonids = ECOREALLOC(ranktaxonids, nameslots * sizeof(int32_t), "Cannot allocate pair rank taxon table");`
			`}`
			`ranktaxonids[namesindex] = tmptaxon->taxid;`
			`namesindex++;`
			`}`
			`}`
			`ECOFREE(ranktaxonids, "free rank taxon table");`

			`return namesindex;`
			`}`

			`void setoktaxforspecificity (ppairscount_t pairs)`
			`{`
			`uint32_t i;`
			`uint32_t j;`
			`uint32_t k;`
			`uint32_t l;`
			`int taxcount;`
			`int32_t taxid;`

			`for (i = 0; i < pairs->paircount; i++)`
			`{`
			`for (j = 0; j < pairs->pairs[i].taxsetindex; j++)`
			`{`
			`for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++)`
			`{`
			`taxid = 0;`
			`taxcount = 0;`
			`for (l = 0; l < pairs->pairs[i].ampsetindex; l++)`
			`{`
			`/compare only char pointers because for equal strings we have same pointer in both sets/`
			`if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia)`
			`{`
			`if (pairs->pairs[i].ampset[l].seqidindex > 1)`
			`{`
			`taxcount += pairs->pairs[i].ampset[l].seqidindex;`
			`break;`
			`}`

			`if (taxid != pairs->pairs[i].ampset[l].taxonids[0])`
			`{`
			`if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];`
			`taxcount++;`
			`}`

			`if (taxcount > 1) break;`
			`}`
			`}`
			`if (taxcount == 1) pairs->pairs[i].oktaxoncount++;`
			`}`
			`}`
			`}`
			`}`

			`int main(int argc, char **argv)`
			`{`
			`pecodnadb_t seqdb; /* of type ecoseq_t */`
			`uint32_t seqdbsize=0;`
			`ecotaxonomy_t *taxonomy;`

			`options_t options;`
			`int carg;`
			`int32_t errflag=0;`

			`int32_t insamples=0;`
			`int32_t outsamples=0;`
			`uint32_t i;`

			`pwordcount_t words;`
			`pprimercount_t primers;`
			`pairscount_t pairs;`

			`int32_t rankdbstats = 0;`

			`//printcurrenttime();`
			`//return 0;`

			`initoptions(&options);`

			`while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) {`

			`switch (carg) {`
			`/* -------------------- */`
			`case 'd': /* database name */`
			`/* -------------------- */`
			`options.prefix = ECOMALLOC(strlen(optarg)+1,`
			`"Error on prefix allocation");`
			`strcpy(options.prefix,optarg);`
			`break;`

			`/* -------------------- */`
			`case 'h': /* help */`
			`/* -------------------- */`
			`PrintHelp();`
			`exit(0);`
			`break;`

			`/* ------------------------- */`
			`case 'l': /* min amplification lenght */`
			`/* ------------------------- */`
			`sscanf(optarg,"%d",&(options.lmin));`
			`break;`

			`/* -------------------------- */`
			`case 'L': /* max amplification lenght */`
			`/* -------------------------- */`
			`sscanf(optarg,"%d",&(options.lmax));`
			`break;`

			`/* -------------------- */`
			`case 'e': /* error max */`
			`/* -------------------- */`
			`sscanf(optarg,"%d",&(options.error_max));`
			`break;`


			`/* ------------------------ */`
			`case '3': /* three prime strict match */`
			`/* ------------------------ */`
			`sscanf(optarg,"%d",&(options.strict_three_prime));`
			`break;`

			`/* -------------------- */`
			`case 'q': /* strict matching quorum */`
			`/* -------------------- */`
			`sscanf(optarg,"%f",&(options.strict_quorum));`
			`break;`

			`/* -------------------- */`
			`case 's': /* strict matching quorum */`
			`/* -------------------- */`
			`sscanf(optarg,"%f",&(options.sensitivity_quorum));`
			`break;`

			`/* -------------------- */`
			`case 't': /* required taxon level for results */`
			`/* -------------------- */`
			`strncpy(options.taxonrank, optarg, 19);`
			`options.taxonrank[19] = 0;`
			`break;`

			`/* -------------------- */`
			`case 'x': /* strict matching quorum */`
			`/* -------------------- */`
			`sscanf(optarg,"%f",&(options.false_positive_quorum));`
			`break;`

			`/* ---------------------------- */`
			`case 'D': /* set in double strand mode */`
			`/* ---------------------------- */`
			`options.doublestrand=1;`
			`break;`

			`/* ---------------------------- */`
			`case 'S': /* set in single strand mode */`
			`/* ---------------------------- */`
			`options.doublestrand=0;`
			`break;`

			`/* ---------------------------- */`
			`case 'U': /* set in single strand mode */`
			`/* ---------------------------- */`
			`options.no_multi_match=TRUE;`
			`break;`

			`/* ------------------------------------------ */`
			`case 'r': /* stores the restricting search taxonomic id */`
			`/* ------------------------------------------ */`
			`options.restricted_taxid = ECOREALLOC(options.restricted_taxid,sizeof(int32_t)*(options.r+1),`
			`"Error on restricted_taxid reallocation");`
			`sscanf(optarg,"%d",&(options.restricted_taxid[options.r]));`
			`options.r++;`
			`break;`

			`/* --------------------------------- */`
			`case 'i': /* stores the taxonomic id to ignore */`
			`/* --------------------------------- */`
			`options.ignored_taxid = ECOREALLOC(options.ignored_taxid,sizeof(int32_t)*(options.g+1),`
			`"Error on excluded_taxid reallocation");`
			`sscanf(optarg,"%d",&(options.ignored_taxid[options.g]));`
			`options.g++;`
			`break;`

			`/* -------------------- */`
			`case 'c': /* sequences are circular */`
			`/* --------------------------------- */`
			`options.circular = 1;`
			`break;`

			`case '?': /* bad option */`
			`/* -------------------- */`
			`errflag++;`
			`}`

			`}`

			`fprintf(stderr,"Reading taxonomy database ...");`
			`taxonomy = read_taxonomy(options.prefix,0);`
			`fprintf(stderr,"Ok\n");`

			`setresulttaxonrank(taxonomy, &options); /TR: set rank level for statistics/`

			`fprintf(stderr,"Reading sequence database ...\n");`

			`seqdb = readdnadb(options.prefix,&seqdbsize);`

			`fprintf(stderr,"Ok\n");`
			`fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);`

			`updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples);`

			`rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options);`

			`fprintf(stderr,"Database is constituted of %5d examples\n",insamples);`
			`fprintf(stderr," and %5d counterexamples\n",outsamples);`
			`fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats);`

			`fprintf(stderr,"\nIndexing words in sequences\n");`

			`printcurrenttimeinmilli();`
			`words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);`
			`printcurrenttimeinmilli();`

			`fprintf(stderr,"\n Strict primer count : %d\n",words->size);`

			`if (options.no_multi_match)`
			`{`
			`(void)filterMultiStrictPrimer(words);`
			`fprintf(stderr,"\n Strict primer with single match count : %d\n",words->size);`
			`}`


			`fprintf(stderr,"\n\n Primer sample : \n");`
			`for (i=0; i<MINI(10,words->size); i++)`
			`fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);`


			`fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");`
			`for (i=0;i<seqdbsize;i++)`
			`{`
			`encodeSequence(seqdb[i]);`
			`fprintf(stderr," Encoded sequences %5d/%5d \r",(int32_t)i+1,(int32_t)seqdbsize);`
			`}`

			`ECOFREE(words->strictcount,"Free strict primer count table");`

			`primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);`

			`ECOFREE(words->words,"Free strict primer table");`
			`ECOFREE(words,"Free strict primer structure");`
			`fprintf(stderr,"\n\n Approximate repeats :%d \n", primers->size);`

			`fprintf(stderr,"\n\n Primer sample : \n");`
			`for (i=0; i<MINI(10,primers->size); i++)`
			`fprintf(stderr," + Primer : %s example sequence count : %5d counterexample sequence count : %5d status : %s\n",ecoUnhashWord(primers->primers[i].word,options.primer_length),`
			`primers->primers[i].inexample,`
			`primers->primers[i].outexample,`
			`primers->primers[i].good ? "good":"bad");`

			`fprintf(stderr,"\n");`

			`/TR: Added/`
			`pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);`
			`setoktaxforspecificity (&pairs);`

			`printpairs (pairs, &options, rankdbstats, seqdbsize);`



			`ECOFREE(pairs.pairs,"Free pairs table");`

			`return 0;`
			`}`