/* * taxstats.c * * Created on: 12 mars 2009 * Author: coissac */ #include #include "ecoprimer.h" static int cmptaxon(const void *t1, const void* t2); static int cmptaxon(const void *t1, const void* t2) { const size_t taxid1=(size_t)t1; const size_t taxid2=(size_t)t2; // fprintf(stderr,"==> counted taxid1 : %d\n",taxid1); // fprintf(stderr,"==> counted taxid2 : %d\n",taxid2); if (taxid1 < taxid2) return -1; if (taxid1 > taxid2) return +1; return 0; } int32_t counttaxon(int32_t taxid) { static void* taxontree=NULL; static int32_t taxoncount=0; // fprintf(stderr,"counted taxid : %d taxontree %p\n",taxid,taxontree); if (taxid==-1) { if (taxontree) ECOFREE(taxontree,"Free taxon tree"); taxontree=NULL; taxoncount=0; return 0; } if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon)))) { tsearch((void*)((size_t)taxid),&taxontree,cmptaxon); taxoncount++; } return taxoncount; } int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, poptions_t options) { uint32_t i; ecotx_t *taxon; ecotx_t *tmptaxon; counttaxon(-1); options->intaxa = 0; for (i=0;itaxons->taxon[seqdb[i]->taxid]); seqdb[i]->isexample=isExampleTaxon(taxonomy,seqdb[i]->taxid,options); tmptaxon = eco_findtaxonatrank(taxon, options->taxonrankidx); // fprintf(stderr,"Taxid : %d %p\n",taxon->taxid,tmptaxon); if (tmptaxon) { // fprintf(stderr,"orig : %d trans : %d\n",taxon->taxid, // tmptaxon->taxid); seqdb[i]->ranktaxonid=tmptaxon->taxid; if (seqdb[i]->isexample) options->intaxa = counttaxon(tmptaxon->taxid); } else seqdb[i]->ranktaxonid=-1; } counttaxon(-1); options->outtaxa = 0; for (i=0;iranktaxonid>=0 && !seqdb[i]->isexample) options->outtaxa = counttaxon(seqdb[i]->ranktaxonid); } return options->outtaxa + options->intaxa; } float taxonomycoverage(ppair_t pair, poptions_t options) { int32_t seqcount; int32_t i; int32_t incount=0; int32_t outcount=0; seqcount=pair->pcr.ampcount; counttaxon(-1); for (i=0; i < seqcount; i++) if (pair->pcr.amplifias[i].sequence->isexample && pair->pcr.amplifias[i].sequence->ranktaxonid > 0 ) incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid); counttaxon(-1); for (i=0; i < seqcount; i++) if (!pair->pcr.amplifias[i].sequence->isexample && pair->pcr.amplifias[i].sequence->ranktaxonid) outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid); pair->intaxa=incount; pair->outtaxa=outcount; pair->bc=(float)incount/options->intaxa; return pair->bc; } static int cmpamp(const void *ampf1, const void* ampf2) { int i; int j = 0; int incr = 1; char cd1; char cd2; int chd = 0; int len = 0; pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1; pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2; if (pampf1->strand != pampf2->strand) { incr = -1; j = pampf1->length - 1; if (pampf2->strand) { pampf1 = (pamptotaxon_t) ampf2; pampf2 = (pamptotaxon_t) ampf1; chd = 1; } } len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length; for (i = 0; i < len; i++, j += incr) { cd1 = pampf1->amplifia[i]; if (incr == -1) cd2 = ecoComplementChar(pampf2->amplifia[j]); else cd2 = pampf2->amplifia[j]; if (cd1 < cd2) return chd ? 1: -1; if (cd2 < cd1) return chd ? -1: 1; } if (pampf1->length > pampf2->length) return chd ? -1: 1; if (pampf2->length > pampf1->length) return chd ? 1: -1; return 0; } void twalkaction (const void *node, VISIT order, int level) { int32_t *taxid = (int32_t*)node; //const size_t taxid=(size_t)node; //printf ("\t%d:%p, ", *taxid, node); counttaxon(*taxid); } int32_t gtxid; void twalkaction2 (const void *node, VISIT order, int level) { int32_t *pt = (int32_t *) node; gtxid = *pt; } void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize) { uint32_t i, j; uint32_t ampfindex = 0; int32_t taxid; void *ampftree = NULL; pamptotaxon_t pcurrentampf; pamptotaxon_t *ptmp; pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree"); for (i = 0; i < pair->pcr.ampcount; i++) { /*populate taxon ids tree against each unique amplifia i.e set of taxon ids for each amplifia*/ if (pair->pcr.amplifias[i].sequence->isexample) { ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia; ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand; ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length; pcurrentampf = &fwithtaxtree[ampfindex]; taxid = pair->pcr.amplifias[i].sequence->ranktaxonid; ptmp = tfind((const void*)pcurrentampf, &ftree, cmpamp); if (ptmp == NULL) { pcurrentampf = &fwithtaxtree[ampfindex]; tsearch((void*)pcurrentampf,&ftree,cmpamp); ampfindex++; } else pcurrentampf = *ptmp; if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL) { pcurrentampf->taxoncount++; tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon); } } } memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int)); counttaxon(-1); for (i = 0; i < ampfindex; i++) { if (ampfwithtaxtree[i].taxoncount > 1) twalk(ampfwithtaxtree[i].taxontree, twalkaction); //TR 5/9/10 - added code for well identified seqs else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/ { gtxid = -1; twalk(ampfwithtaxtree[i].taxontree, twalkaction2); if (gtxid != -1) { for (j = 0; j < seqdbsize; j++) if (seqdb[j]->ranktaxonid == gtxid &&(pair->p1->directCount[j] > 0 || pair->p1->reverseCount[j] > 0) && (pair->p2->directCount[j] > 0 || pair->p2->reverseCount[j] > 0)) { pair->wellIdentifiedSeqs[j] = 1; } } } } pair->notwellidentifiedtaxa = counttaxon(-2); pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa; ECOFREE (ampfwithtaxtree, "Free amplifia table"); }