diff --git a/src/libecoprimer/taxstats.c b/src/libecoprimer/taxstats.c index b08d783..b67823d 100644 --- a/src/libecoprimer/taxstats.c +++ b/src/libecoprimer/taxstats.c @@ -6,41 +6,118 @@ */ #include +#include "ecoprimer.h" -static int cmptaxon(void *t1, void* t2); +static int cmptaxon(const void *t1, const void* t2); - -static int cmptaxon(void *t1, void* t2) +static int cmptaxon(const void *t1, const void* t2) { - if (*t1 < *t2) + const size_t taxid1=(size_t)t1; + const size_t taxid2=(size_t)t2; + + // fprintf(stderr,"==> counted taxid1 : %d\n",taxid1); + // fprintf(stderr,"==> counted taxid2 : %d\n",taxid2); + + if (taxid1 < taxid2) return -1; - if (*t1 > *t2) + if (taxid1 > taxid2) return +1; return 0; } +int32_t counttaxon(int32_t taxid) +{ + static void* taxontree=NULL; + static int32_t taxoncount=0; + + // fprintf(stderr,"counted taxid : %d taxontree %p\n",taxid,taxontree); + + if (taxid==-1) + { + if (taxontree) + ECOFREE(taxontree,"Free taxon tree"); + taxontree=NULL; + taxoncount=0; + return 0; + } + + if ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))) + { + tsearch((void*)((size_t)taxid),&taxontree,cmptaxon); + taxoncount++; + } + + return taxoncount; +} + +int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, + poptions_t options) +{ + + uint32_t i; + uint32_t j; + ecotx_t *taxon; + ecotx_t *tmptaxon; + + counttaxon(-1); + + for (i=0;itaxons->taxon[seqdb[i]->taxid]); + seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options); + + tmptaxon = eco_findtaxonatrank(taxon, + options->taxonrankidx); + + // fprintf(stderr,"Taxid : %d %p\n",taxon->taxid,tmptaxon); + + if (tmptaxon) + { + // fprintf(stderr,"orig : %d trans : %d\n",taxon->taxid, + // tmptaxon->taxid); + + seqdb[i]->ranktaxonid=tmptaxon->taxid; + if (seqdb[i]->isexample) + options->intaxa = counttaxon(tmptaxon->taxid); + } + else + seqdb[i]->ranktaxonid=-1; + } + + counttaxon(-1); + + for (i=0;iranktaxonid>=0 && !seqdb[i]->isexample) + options->outtaxa = counttaxon(seqdb[i]->ranktaxonid); + } + + return options->outtaxa + options->intaxa; +} + + float taxonomycoverage(ppair_t pair, poptions_t options) { int32_t seqcount; - int32_t *taxon; int32_t i; - void **taxontree; - ppair_t *inserted; - int32_t taxoncount; + int32_t incount=0; + int32_t outcount=0; seqcount=pair->pcr.ampcount; - taxon = ECOMALLOC(seqcount * sizeof(int32_t)); - - taxoncount=0; - taxontree=NULL; + counttaxon(-1); for (i=0; i < seqcount; i++) if (pair->pcr.amplifias[i].sequence->isexample) - { - inserted = tsearch(pair->pcr.amplifias[i].sequence->ranktaxonid, - taxontree, - cmptaxon); - } + incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid); + counttaxon(-1); + for (i=0; i < seqcount; i++) + if (!pair->pcr.amplifias[i].sequence->isexample) + outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid); + + + pair->intaxa=incount; + pair->outtaxa=outcount; + return (float)incount/options->intaxa; }