This commit is contained in:
2009-03-25 10:18:34 +00:00
parent 6726294c78
commit 75a6dac09a
4 changed files with 95 additions and 20 deletions

View File

@ -93,6 +93,8 @@ void printcurrenttimeinmilli()
void printapair(int32_t index,ppair_t pair, poptions_t options)
{
uint32_t wellidentifiedtaxa;
printf("%6d\t",index);
if (pair->asdirect1)
printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
@ -116,6 +118,17 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
printf("\t%d", pair->intaxa);
printf("\t%d", pair->outtaxa);
printf("\t%4.3f", (float)pair->intaxa/options->intaxa);
wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa;
printf("\t%d", pair->notwellidentifiedtaxa);
printf("\t%d", (pair->intaxa + pair->outtaxa));
//printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa));
printf("\t%d", pair->mind);
printf("\t%d", pair->maxd);
@ -127,7 +140,6 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
{
uint32_t i,j;
float q,qfp;
float y1,y2;
for (i=0,j=0;i < count;i++)
{

View File

@ -159,7 +159,7 @@ typedef struct {
uint32_t outexample; //< counterexample sequence count
uint32_t intaxa; //< example taxa count
uint32_t outtaxa; //< counterexample taxa count
uint32_t notwellidentifiedtaxa;
// these statistics are relative to inexample sequences
@ -214,6 +214,18 @@ typedef struct {
uint32_t size;
} merge_t, *pmerge_t;
typedef struct {
const char *amplifia;
bool_t strand;
int32_t length;
void *taxontree;
int32_t taxoncount;
}amptotaxon_t, *pamptotaxon_t;
typedef struct {
int32_t taxid;
void *amptree;
}taxontoamp_t, *ptaxontoamp_t;
typedef struct {
uint32_t lmin; //**< Amplifia minimal length
@ -301,5 +313,6 @@ int32_t getrankdbstats(pecodnadb_t seqdb,
poptions_t options);
float taxonomycoverage(ppair_t pair, poptions_t options);
char ecoComplementChar(char base);
void taxonomyspecificity (ppair_t pair);
#endif /* EPSORT_H_ */

View File

@ -155,18 +155,7 @@ char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len)
ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
{
uint32_t i;
uint32_t j;
uint32_t k;
uint32_t d;
uint32_t strt;
uint32_t end;
ppairtree_t primerpairs;
primermatchcount_t seqmatchcount;
word_t w1;
word_t w2;
char *amplifia;
char *oldamp;
primerpairs = initpairtree(NULL);
@ -347,7 +336,6 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
pcurrent->pcr.ampcount++;
// fprintf(stderr,"%c%c W1 : %s direct : %c",
// "bG"[(int)pcurrent->p1->good],
// "bG"[(int)pcurrent->p2->good],

View File

@ -25,6 +25,18 @@ static int cmptaxon(const void *t1, const void* t2)
return 0;
}
static int cmptaxamp (const void *t1, const void* t2)
{
const ptaxontoamp_t taxid1=(ptaxontoamp_t)t1;
const ptaxontoamp_t taxid2=(ptaxontoamp_t)t2;
if (taxid1->taxid < taxid2->taxid)
return -1;
if (taxid1->taxid > taxid2->taxid)
return +1;
return 0;
}
int32_t counttaxon(int32_t taxid)
{
static void* taxontree=NULL;
@ -40,8 +52,9 @@ int32_t counttaxon(int32_t taxid)
taxoncount=0;
return 0;
}
if ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon)))
if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))))
{
tsearch((void*)((size_t)taxid),&taxontree,cmptaxon);
taxoncount++;
@ -55,7 +68,6 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
{
uint32_t i;
uint32_t j;
ecotx_t *taxon;
ecotx_t *tmptaxon;
@ -133,8 +145,8 @@ static int cmpamp(const void *ampf1, const void* ampf2)
char cd2;
int chd = 0;
pamplifia_t pampf1 = (pamplifia_t) ampf1;
pamplifia_t pampf2 = (pamplifia_t) ampf2;
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
if (pampf1->strand != pampf2->strand)
@ -143,8 +155,8 @@ static int cmpamp(const void *ampf1, const void* ampf2)
j = pampf1->length - 1;
if (pampf2->strand)
{
pampf1 = (pamplifia_t) ampf2;
pampf2 = (pamplifia_t) ampf1;
pampf1 = (pamptotaxon_t) ampf2;
pampf2 = (pamptotaxon_t) ampf1;
chd = 1;
}
}
@ -163,3 +175,53 @@ static int cmpamp(const void *ampf1, const void* ampf2)
return 0;
}
void twalkaction (const void *node, VISIT order, int level)
{
const size_t taxid=(size_t)node;
if (order == preorder)
counttaxon(taxid);
}
void taxonomyspecificity (ppair_t pair)
{
uint32_t i;
int32_t j;
int32_t ampfindex = 0;
int32_t taxid;
void *ampftree = NULL;
pamptotaxon_t pcurrentampf;
pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree");
for (i = 0; i < pair->pcr.ampcount; i++)
{
/*populate taxon ids tree against each unique amplifia
i.e set of taxon ids for each amplifia*/
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
pcurrentampf = &ampfwithtaxtree[ampfindex];
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
if ((pcurrentampf = tfind((void*)pcurrentampf, &ampftree, cmpamp)) == NULL)
{
pcurrentampf = &ampfwithtaxtree[ampfindex];
tsearch((void*)pcurrentampf,&ampftree,cmpamp);
ampfindex++;
}
if (tfind((void*)((size_t)taxid), &pcurrentampf->taxontree, cmptaxon) == NULL)
pcurrentampf->taxoncount++;
tsearch((void*)((size_t)taxid),&pcurrentampf->taxontree,cmptaxon);
}
counttaxon(-1);
for (j = 0; j < ampfindex; j++)
{
if (ampfwithtaxtree[j].taxoncount > 1)
twalk(ampfwithtaxtree[j].taxontree, twalkaction);
}
pair->notwellidentifiedtaxa = counttaxon(-2);
ECOFREE (ampfwithtaxtree, "Free amplifia table");
}