diff --git a/src/ecoprimer.c b/src/ecoprimer.c index ac3cda8..f3c6fd8 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -93,6 +93,8 @@ void printcurrenttimeinmilli() void printapair(int32_t index,ppair_t pair, poptions_t options) { + uint32_t wellidentifiedtaxa; + printf("%6d\t",index); if (pair->asdirect1) printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length)); @@ -116,6 +118,17 @@ void printapair(int32_t index,ppair_t pair, poptions_t options) printf("\t%d", pair->intaxa); printf("\t%d", pair->outtaxa); printf("\t%4.3f", (float)pair->intaxa/options->intaxa); + + wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa; + + printf("\t%d", pair->notwellidentifiedtaxa); + + printf("\t%d", (pair->intaxa + pair->outtaxa)); + + + + //printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa)); + printf("\t%d", pair->mind); printf("\t%d", pair->maxd); @@ -127,7 +140,6 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti { uint32_t i,j; float q,qfp; - float y1,y2; for (i=0,j=0;i < count;i++) { diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index 9fdac9b..5ab2c28 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -159,7 +159,7 @@ typedef struct { uint32_t outexample; //< counterexample sequence count uint32_t intaxa; //< example taxa count uint32_t outtaxa; //< counterexample taxa count - + uint32_t notwellidentifiedtaxa; // these statistics are relative to inexample sequences @@ -214,6 +214,18 @@ typedef struct { uint32_t size; } merge_t, *pmerge_t; +typedef struct { + const char *amplifia; + bool_t strand; + int32_t length; + void *taxontree; + int32_t taxoncount; +}amptotaxon_t, *pamptotaxon_t; + +typedef struct { + int32_t taxid; + void *amptree; +}taxontoamp_t, *ptaxontoamp_t; typedef struct { uint32_t lmin; //**< Amplifia minimal length @@ -301,5 +313,6 @@ int32_t getrankdbstats(pecodnadb_t seqdb, poptions_t options); float taxonomycoverage(ppair_t pair, poptions_t options); char ecoComplementChar(char base); +void taxonomyspecificity (ppair_t pair); #endif /* EPSORT_H_ */ diff --git a/src/libecoprimer/pairs.c b/src/libecoprimer/pairs.c index f48f509..88e6c21 100644 --- a/src/libecoprimer/pairs.c +++ b/src/libecoprimer/pairs.c @@ -155,18 +155,7 @@ char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len) ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options) { uint32_t i; - uint32_t j; - uint32_t k; - uint32_t d; - uint32_t strt; - uint32_t end; ppairtree_t primerpairs; - primermatchcount_t seqmatchcount; - word_t w1; - word_t w2; - char *amplifia; - char *oldamp; - primerpairs = initpairtree(NULL); @@ -347,7 +336,6 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid, pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ; pcurrent->pcr.ampcount++; - // fprintf(stderr,"%c%c W1 : %s direct : %c", // "bG"[(int)pcurrent->p1->good], // "bG"[(int)pcurrent->p2->good], diff --git a/src/libecoprimer/taxstats.c b/src/libecoprimer/taxstats.c index 60ed55c..1939592 100644 --- a/src/libecoprimer/taxstats.c +++ b/src/libecoprimer/taxstats.c @@ -25,6 +25,18 @@ static int cmptaxon(const void *t1, const void* t2) return 0; } +static int cmptaxamp (const void *t1, const void* t2) +{ + const ptaxontoamp_t taxid1=(ptaxontoamp_t)t1; + const ptaxontoamp_t taxid2=(ptaxontoamp_t)t2; + + if (taxid1->taxid < taxid2->taxid) + return -1; + if (taxid1->taxid > taxid2->taxid) + return +1; + return 0; +} + int32_t counttaxon(int32_t taxid) { static void* taxontree=NULL; @@ -40,8 +52,9 @@ int32_t counttaxon(int32_t taxid) taxoncount=0; return 0; } + - if ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))) + if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon)))) { tsearch((void*)((size_t)taxid),&taxontree,cmptaxon); taxoncount++; @@ -55,7 +68,6 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax { uint32_t i; - uint32_t j; ecotx_t *taxon; ecotx_t *tmptaxon; @@ -133,8 +145,8 @@ static int cmpamp(const void *ampf1, const void* ampf2) char cd2; int chd = 0; - pamplifia_t pampf1 = (pamplifia_t) ampf1; - pamplifia_t pampf2 = (pamplifia_t) ampf2; + pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1; + pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2; if (pampf1->strand != pampf2->strand) @@ -143,8 +155,8 @@ static int cmpamp(const void *ampf1, const void* ampf2) j = pampf1->length - 1; if (pampf2->strand) { - pampf1 = (pamplifia_t) ampf2; - pampf2 = (pamplifia_t) ampf1; + pampf1 = (pamptotaxon_t) ampf2; + pampf2 = (pamptotaxon_t) ampf1; chd = 1; } } @@ -163,3 +175,53 @@ static int cmpamp(const void *ampf1, const void* ampf2) return 0; } +void twalkaction (const void *node, VISIT order, int level) +{ + const size_t taxid=(size_t)node; + + if (order == preorder) + counttaxon(taxid); +} + +void taxonomyspecificity (ppair_t pair) +{ + uint32_t i; + int32_t j; + int32_t ampfindex = 0; + int32_t taxid; + void *ampftree = NULL; + pamptotaxon_t pcurrentampf; + + pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree"); + + for (i = 0; i < pair->pcr.ampcount; i++) + { + /*populate taxon ids tree against each unique amplifia + i.e set of taxon ids for each amplifia*/ + ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia; + ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand; + ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length; + pcurrentampf = &fwithtaxtree[ampfindex]; + taxid = pair->pcr.amplifias[i].sequence->ranktaxonid; + + if ((pcurrentampf = tfind((void*)pcurrentampf, &ftree, cmpamp)) == NULL) + { + pcurrentampf = &fwithtaxtree[ampfindex]; + tsearch((void*)pcurrentampf,&ftree,cmpamp); + ampfindex++; + } + if (tfind((void*)((size_t)taxid), &pcurrentampf->taxontree, cmptaxon) == NULL) + pcurrentampf->taxoncount++; + tsearch((void*)((size_t)taxid),&pcurrentampf->taxontree,cmptaxon); + } + + counttaxon(-1); + for (j = 0; j < ampfindex; j++) + { + if (ampfwithtaxtree[j].taxoncount > 1) + twalk(ampfwithtaxtree[j].taxontree, twalkaction); + } + + pair->notwellidentifiedtaxa = counttaxon(-2); + ECOFREE (ampfwithtaxtree, "Free amplifia table"); +} \ No newline at end of file