git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@287 60f365c0-8329-0410-b2a4-ec073aeeaa1d
264 lines
6.1 KiB
C
264 lines
6.1 KiB
C
/*
|
|
* taxstats.c
|
|
*
|
|
* Created on: 12 mars 2009
|
|
* Author: coissac
|
|
*/
|
|
|
|
#include <search.h>
|
|
#include "ecoprimer.h"
|
|
|
|
static int cmptaxon(const void *t1, const void* t2);
|
|
|
|
static int cmptaxon(const void *t1, const void* t2)
|
|
{
|
|
const size_t taxid1=(size_t)t1;
|
|
const size_t taxid2=(size_t)t2;
|
|
|
|
// fprintf(stderr,"==> counted taxid1 : %d\n",taxid1);
|
|
// fprintf(stderr,"==> counted taxid2 : %d\n",taxid2);
|
|
|
|
if (taxid1 < taxid2)
|
|
return -1;
|
|
if (taxid1 > taxid2)
|
|
return +1;
|
|
return 0;
|
|
}
|
|
|
|
int32_t counttaxon(int32_t taxid)
|
|
{
|
|
static void* taxontree=NULL;
|
|
static int32_t taxoncount=0;
|
|
|
|
// fprintf(stderr,"counted taxid : %d taxontree %p\n",taxid,taxontree);
|
|
|
|
if (taxid==-1)
|
|
{
|
|
if (taxontree)
|
|
ECOFREE(taxontree,"Free taxon tree");
|
|
taxontree=NULL;
|
|
taxoncount=0;
|
|
return 0;
|
|
}
|
|
|
|
|
|
if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))))
|
|
{
|
|
tsearch((void*)((size_t)taxid),&taxontree,cmptaxon);
|
|
taxoncount++;
|
|
}
|
|
return taxoncount;
|
|
}
|
|
|
|
int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
|
|
poptions_t options)
|
|
{
|
|
|
|
uint32_t i;
|
|
ecotx_t *taxon;
|
|
ecotx_t *tmptaxon;
|
|
|
|
counttaxon(-1);
|
|
options->intaxa = 0;
|
|
|
|
for (i=0;i<seqdbsize;i++)
|
|
{
|
|
taxon = &(taxonomy->taxons->taxon[seqdb[i]->taxid]);
|
|
seqdb[i]->isexample=isExampleTaxon(taxonomy,seqdb[i]->taxid,options);
|
|
|
|
tmptaxon = eco_findtaxonatrank(taxon,
|
|
options->taxonrankidx);
|
|
|
|
// fprintf(stderr,"Taxid : %d %p\n",taxon->taxid,tmptaxon);
|
|
|
|
if (tmptaxon)
|
|
{
|
|
// fprintf(stderr,"orig : %d trans : %d\n",taxon->taxid,
|
|
// tmptaxon->taxid);
|
|
|
|
seqdb[i]->ranktaxonid=tmptaxon->taxid;
|
|
if (seqdb[i]->isexample)
|
|
options->intaxa = counttaxon(tmptaxon->taxid);
|
|
}
|
|
else
|
|
seqdb[i]->ranktaxonid=-1;
|
|
}
|
|
|
|
counttaxon(-1);
|
|
options->outtaxa = 0;
|
|
|
|
for (i=0;i<seqdbsize;i++)
|
|
{
|
|
if (seqdb[i]->ranktaxonid>=0 && !seqdb[i]->isexample)
|
|
options->outtaxa = counttaxon(seqdb[i]->ranktaxonid);
|
|
}
|
|
|
|
return options->outtaxa + options->intaxa;
|
|
}
|
|
|
|
|
|
float taxonomycoverage(ppair_t pair, poptions_t options)
|
|
{
|
|
int32_t seqcount;
|
|
int32_t i;
|
|
int32_t incount=0;
|
|
int32_t outcount=0;
|
|
|
|
|
|
seqcount=pair->pcr.ampcount;
|
|
|
|
counttaxon(-1);
|
|
for (i=0; i < seqcount; i++)
|
|
if (pair->pcr.amplifias[i].sequence->isexample
|
|
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
|
|
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
|
|
|
counttaxon(-1);
|
|
for (i=0; i < seqcount; i++)
|
|
if (!pair->pcr.amplifias[i].sequence->isexample
|
|
&& pair->pcr.amplifias[i].sequence->ranktaxonid)
|
|
outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
|
|
|
|
|
pair->intaxa=incount;
|
|
pair->outtaxa=outcount;
|
|
pair->bc=(float)incount/options->intaxa;
|
|
return pair->bc;
|
|
}
|
|
|
|
|
|
static int cmpamp(const void *ampf1, const void* ampf2)
|
|
{
|
|
int i;
|
|
int j = 0;
|
|
int incr = 1;
|
|
char cd1;
|
|
char cd2;
|
|
int chd = 0;
|
|
int len = 0;
|
|
|
|
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
|
|
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
|
|
|
|
|
|
if (pampf1->strand != pampf2->strand)
|
|
{
|
|
incr = -1;
|
|
j = pampf1->length - 1;
|
|
if (pampf2->strand)
|
|
{
|
|
pampf1 = (pamptotaxon_t) ampf2;
|
|
pampf2 = (pamptotaxon_t) ampf1;
|
|
chd = 1;
|
|
}
|
|
}
|
|
|
|
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
|
|
|
|
for (i = 0; i < len; i++, j += incr)
|
|
{
|
|
cd1 = pampf1->amplifia[i];
|
|
if (incr == -1)
|
|
cd2 = ecoComplementChar(pampf2->amplifia[j]);
|
|
else
|
|
cd2 = pampf2->amplifia[j];
|
|
|
|
if (cd1 < cd2) return chd ? 1: -1;
|
|
if (cd2 < cd1) return chd ? -1: 1;
|
|
}
|
|
|
|
if (pampf1->length > pampf2->length) return chd ? -1: 1;
|
|
if (pampf2->length > pampf1->length) return chd ? 1: -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void twalkaction (const void *node, VISIT order, int level)
|
|
{
|
|
int32_t *taxid = (int32_t*)node;
|
|
//const size_t taxid=(size_t)node;
|
|
//printf ("\t%d:%p, ", *taxid, node);
|
|
counttaxon(*taxid);
|
|
}
|
|
|
|
int32_t gtxid;
|
|
void twalkaction2 (const void *node, VISIT order, int level)
|
|
{
|
|
int32_t *pt = (int32_t *) node;
|
|
gtxid = *pt;
|
|
}
|
|
|
|
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
|
{
|
|
uint32_t i, j;
|
|
uint32_t ampfindex = 0;
|
|
int32_t taxid;
|
|
void *ampftree = NULL;
|
|
pamptotaxon_t pcurrentampf;
|
|
pamptotaxon_t *ptmp;
|
|
|
|
pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree");
|
|
|
|
for (i = 0; i < pair->pcr.ampcount; i++)
|
|
{
|
|
/*populate taxon ids tree against each unique amplifia
|
|
i.e set of taxon ids for each amplifia*/
|
|
if (pair->pcr.amplifias[i].sequence->isexample)
|
|
{
|
|
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
|
|
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
|
|
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
|
|
pcurrentampf = &fwithtaxtree[ampfindex];
|
|
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
|
|
ptmp = tfind((const void*)pcurrentampf, &ftree, cmpamp);
|
|
if (ptmp == NULL)
|
|
{
|
|
pcurrentampf = &fwithtaxtree[ampfindex];
|
|
tsearch((void*)pcurrentampf,&ftree,cmpamp);
|
|
ampfindex++;
|
|
}
|
|
else
|
|
pcurrentampf = *ptmp;
|
|
|
|
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
|
|
{
|
|
pcurrentampf->taxoncount++;
|
|
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
|
|
}
|
|
}
|
|
}
|
|
|
|
memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int));
|
|
counttaxon(-1);
|
|
for (i = 0; i < ampfindex; i++)
|
|
{
|
|
if (ampfwithtaxtree[i].taxoncount > 1)
|
|
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
|
|
//TR 5/9/10 - added code for well identified seqs
|
|
else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/
|
|
{
|
|
gtxid = -1;
|
|
twalk(ampfwithtaxtree[i].taxontree, twalkaction2);
|
|
|
|
if (gtxid != -1)
|
|
{
|
|
for (j = 0; j < seqdbsize; j++)
|
|
if (seqdb[j]->ranktaxonid == gtxid
|
|
&&(pair->p1->directCount[j] > 0
|
|
|| pair->p1->reverseCount[j] > 0)
|
|
&& (pair->p2->directCount[j] > 0
|
|
|| pair->p2->reverseCount[j] > 0))
|
|
{
|
|
pair->wellIdentifiedSeqs[j] = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pair->notwellidentifiedtaxa = counttaxon(-2);
|
|
pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
|
|
|
|
ECOFREE (ampfwithtaxtree, "Free amplifia table");
|
|
|
|
}
|