diff --git a/src/ecoPrimer b/src/ecoPrimer index 96477a6..2976f92 100755 Binary files a/src/ecoPrimer and b/src/ecoPrimer differ diff --git a/src/ecoprimer.c b/src/ecoprimer.c index fb6b9fb..36cb480 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -91,9 +91,99 @@ void printcurrenttimeinmilli() /*TR: Added*/ -#ifdef MASKEDCODE +void printapair(int32_t index,ppair_t pair, poptions_t options) +{ + printf("%6d\t",index); + if (pair->asdirect1) + printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length)); + else + printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word, + options->primer_length),options->primer_length)); + if (pair->asdirect2) + printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word, + options->primer_length),options->primer_length)); + else + printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length)); + + printf("\t%d", pair->inexample); + printf("\t%d", pair->outexample); + + printf("\t%d", pair->mind); + printf("\t%d", pair->maxd); + printf("\t%3.2f\t", (float)pair->sumd/pair->inexample); + printf("\t%4.3f\n", pair->yule); + +} + +uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options) +{ + uint32_t i,j; + float q,qfp; + float y1,y2; + + for (i=0,j=0;i < count;i++) + { + if (options->insamples) + q = (float)sortedpairs[i]->inexample/options->insamples; + else q=1.0; + + if (options->outsamples) + qfp = (float)sortedpairs[i]->outexample/options->outsamples; + else qfp=0.0; + + sortedpairs[i]->quorumin = q; + sortedpairs[i]->quorumout = qfp; + sortedpairs[i]->yule = q -qfp; + + + sortedpairs[j]=sortedpairs[i]; + + + + if (q > options->sensitivity_quorum && + qfp < options->false_positive_quorum) + j++; + } + + return j; +} + +void printpairs (ppairtree_t pairs, poptions_t options) +{ + ppair_t* sortedpairs; + ppair_t* index; + ppairlist_t pl; + int32_t i,j; + int32_t count; + + fprintf(stderr,"Total pair count : %d\n",pairs->count); + + sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs"); + index=sortedpairs; + pl=pairs->first; + j=0; + while(pl->next) + { + for (i=0;ipaircount;i++,j++) + sortedpairs[j]=pl->pairs+i; + pl=pl->next; + } + + for (i=0;ipaircount;i++,j++) + sortedpairs[j]=pl->pairs+i; + + count=filterandsortpairs(sortedpairs,pairs->count,options); + + for (i=0;i < count;i++) + printapair(i,sortedpairs[i],options); + +} + + +#ifdef MASKEDCODE void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize) + { uint32_t i; uint32_t wordsize = options->primer_length; @@ -417,6 +507,9 @@ int main(int argc, char **argv) fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize); updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples); + options.dbsize=seqdbsize; + options.insamples=insamples; + options.outsamples=outsamples; rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options); @@ -473,7 +566,7 @@ int main(int argc, char **argv) // setoktaxforspecificity (&pairs); - // printpairs (pairs, &options, rankdbstats, seqdbsize); + printpairs (pairs, &options); diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index 0feb6cb..cfbdcc7 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -165,7 +165,9 @@ typedef struct { uint32_t mind; //< minimum distance between primers uint32_t maxd; //< maximum distance between primers uint32_t sumd; //< distance sum - + float yule; + float quorumin; + float quorumout; // // uint32_t ampsetcount; // uint32_t ampsetindex; @@ -192,6 +194,7 @@ typedef struct { ppairlist_t first; ppairlist_t last; void *tree; + int32_t count; } pairtree_t, *ppairtree_t; typedef struct { @@ -234,6 +237,9 @@ typedef struct { bool_t no_multi_match; char taxonrank[20]; //TR to count ranks against a pair int32_t taxonrankidx; //TR to count ranks against a pair + int32_t dbsize; + int32_t insamples; + int32_t outsamples; } options_t, *poptions_t; typedef ecoseq_t **pecodnadb_t; diff --git a/src/libecoprimer/pairs.c b/src/libecoprimer/pairs.c index 0af6825..838bbc7 100644 --- a/src/libecoprimer/pairs.c +++ b/src/libecoprimer/pairs.c @@ -174,87 +174,9 @@ ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t { buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options); } -// if (seqmatchcount.matchcount == 0) continue; -// -// for (j=0; j < seqmatchcount.matchcount; j++) -// { -// strt = 0; -// w1 = seqmatchcount.matches[j].word; -// /*first word should b on direct strand*/ -// if (!seqmatchcount.matches[j].strand) -// w1 = ecoComplementWord(w1, options->primer_length); -// else -// strt = options->primer_length; -// -// for (k=j+1; k < seqmatchcount.matchcount; k++) -// { -// end = 0; -// w2 = seqmatchcount.matches[k].word; -// /*second word should be on reverse strand*/ -// if (seqmatchcount.matches[k].strand) -// w2 = ecoComplementWord(w2, options->primer_length); -// else -// end = options->primer_length; -// -// if (!(seqmatchcount.matches[j].good || seqmatchcount.matches[k].good)) continue; -// if (w1 == w2) continue; -// -// d = seqmatchcount.matches[k].position - seqmatchcount.matches[j].position; -// if (d >= options->lmin && d <= options->lmax) -// { -// /*get amplified string*/ -// amplifia = getamplifia (seqdb[i], seqmatchcount.matches[j].position + strt, d - strt - end); -// -// foundindex = pairinlist(pairs, w1, w2, paircount); -// if (foundindex != -1) /*pair is found*/ -// { -// if (seqdb[i]->isexample) -// pairs[foundindex].inexample++; -// else -// pairs[foundindex].outexample++; -// -// if (pairs[foundindex].mind > d) pairs[foundindex].mind = d; -// else if (pairs[foundindex].maxd < d) pairs[foundindex].maxd = d; -// -// oldamp = addamplifiasetelem (&pairs[foundindex], amplifia, seqdb[i]->ranktaxonid); -// /*if exact same string is already in amplifia set then use that for taxon set, it will help for -// * calculating the fully identified taxons i.e specificity, we will compare pointrs instead of strings -// * because same string means same pointer*/ -// if (oldamp) -// { -// ECOFREE (amplifia, "free amplifia"); -// amplifia = oldamp; -// } -// addtaxampsetelem (&pairs[foundindex], seqdb[i]->ranktaxonid, amplifia); -// -// continue; -// } -// -// if (paircount == pairslots) -// { -// pairslots += 500; -// pairs = ECOREALLOC(pairs, pairslots * sizeof(pairs_t), "Cannot allocate pairs table"); -// } -// pairs[paircount].w1 = w1; -// pairs[paircount].w2 = w2; -// if (seqdb[i]->isexample) pairs[paircount].inexample = 1; -// else pairs[paircount].outexample = 1; -// pairs[paircount].mind = d; -// pairs[paircount].maxd = d; -// oldamp = addamplifiasetelem (&pairs[paircount], amplifia, seqdb[i]->ranktaxonid); -// addtaxampsetelem (&pairs[paircount], seqdb[i]->ranktaxonid, amplifia); -// -// paircount++; -// } -// else if (d > options->lmax) -// break; /*once if the distance is greater than lmax then it will keep on increasing*/ -// } -// } -// ECOFREE(seqmatchcount.matches, "Cannot free matches table"); -// } -// primerpairs.pairs = ECOREALLOC(pairs, paircount * sizeof(pairs_t), "Cannot allocate pairs table"); -// primerpairs.paircount = paircount; -// return primerpairs; + + return primerpairs; + } #define DMAX (2000000000) @@ -327,7 +249,7 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid, if (matchcount>1) { - fprintf(stderr,"\n====================================\n"); +// fprintf(stderr,"\n====================================\n"); sortmatch(matches,matchcount); // sort in ascending order by position @@ -425,30 +347,30 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid, pcurrent->pcr.ampcount++; - fprintf(stderr,"%c%c W1 : %s direct : %c", - "bG"[(int)pcurrent->p1->good], - "bG"[(int)pcurrent->p2->good], - ecoUnhashWord(pcurrent->p1->word, options->primer_length), - "><"[(int)pcurrent->asdirect1] - ); - - fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n", - ecoUnhashWord(pcurrent->p2->word, options->primer_length), - "><"[(int)pcurrent->asdirect2], - distance, - pcurrent->mind,pcurrent->maxd, - (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0, - pcurrent->inexample,pcurrent->outexample, - " N"[(pcurrent->outexample+pcurrent->inexample)==1], - paircount - - ); - +// fprintf(stderr,"%c%c W1 : %s direct : %c", +// "bG"[(int)pcurrent->p1->good], +// "bG"[(int)pcurrent->p2->good], +// ecoUnhashWord(pcurrent->p1->word, options->primer_length), +// "><"[(int)pcurrent->asdirect1] +// ); +// +// fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n", +// ecoUnhashWord(pcurrent->p2->word, options->primer_length), +// "><"[(int)pcurrent->asdirect2], +// distance, +// pcurrent->mind,pcurrent->maxd, +// (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0, +// pcurrent->inexample,pcurrent->outexample, +// " N"[(pcurrent->outexample+pcurrent->inexample)==1], +// paircount +// +// ); +// } } } - + pairs->count=paircount; } diff --git a/src/libecoprimer/pairtree.c b/src/libecoprimer/pairtree.c index d1c0895..7155104 100644 --- a/src/libecoprimer/pairtree.c +++ b/src/libecoprimer/pairtree.c @@ -130,5 +130,7 @@ ppairtree_t initpairtree(ppairtree_t tree) tree->last=tree->first; tree->tree=NULL; + tree->count=0; + return tree; } diff --git a/src/libecoprimer/taxstats.c b/src/libecoprimer/taxstats.c new file mode 100644 index 0000000..b08d783 --- /dev/null +++ b/src/libecoprimer/taxstats.c @@ -0,0 +1,46 @@ +/* + * taxstats.c + * + * Created on: 12 mars 2009 + * Author: coissac + */ + +#include + +static int cmptaxon(void *t1, void* t2); + + +static int cmptaxon(void *t1, void* t2) +{ + if (*t1 < *t2) + return -1; + if (*t1 > *t2) + return +1; + return 0; +} + +float taxonomycoverage(ppair_t pair, poptions_t options) +{ + int32_t seqcount; + int32_t *taxon; + int32_t i; + void **taxontree; + ppair_t *inserted; + int32_t taxoncount; + + + seqcount=pair->pcr.ampcount; + taxon = ECOMALLOC(seqcount * sizeof(int32_t)); + + taxoncount=0; + taxontree=NULL; + + for (i=0; i < seqcount; i++) + if (pair->pcr.amplifias[i].sequence->isexample) + { + inserted = tsearch(pair->pcr.amplifias[i].sequence->ranktaxonid, + taxontree, + cmptaxon); + } + +}