first version with preliminary print function

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@186 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2009-03-12 09:55:01 +00:00
parent d911d6bd20
commit 665b22989f
6 changed files with 174 additions and 105 deletions

Binary file not shown.

View File

@ -91,9 +91,99 @@ void printcurrenttimeinmilli()
/*TR: Added*/
#ifdef MASKEDCODE
void printapair(int32_t index,ppair_t pair, poptions_t options)
{
printf("%6d\t",index);
if (pair->asdirect1)
printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
else
printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word,
options->primer_length),options->primer_length));
if (pair->asdirect2)
printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word,
options->primer_length),options->primer_length));
else
printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length));
printf("\t%d", pair->inexample);
printf("\t%d", pair->outexample);
printf("\t%d", pair->mind);
printf("\t%d", pair->maxd);
printf("\t%3.2f\t", (float)pair->sumd/pair->inexample);
printf("\t%4.3f\n", pair->yule);
}
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
{
uint32_t i,j;
float q,qfp;
float y1,y2;
for (i=0,j=0;i < count;i++)
{
if (options->insamples)
q = (float)sortedpairs[i]->inexample/options->insamples;
else q=1.0;
if (options->outsamples)
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0;
sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q -qfp;
sortedpairs[j]=sortedpairs[i];
if (q > options->sensitivity_quorum &&
qfp < options->false_positive_quorum)
j++;
}
return j;
}
void printpairs (ppairtree_t pairs, poptions_t options)
{
ppair_t* sortedpairs;
ppair_t* index;
ppairlist_t pl;
int32_t i,j;
int32_t count;
fprintf(stderr,"Total pair count : %d\n",pairs->count);
sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs");
index=sortedpairs;
pl=pairs->first;
j=0;
while(pl->next)
{
for (i=0;i<pl->paircount;i++,j++)
sortedpairs[j]=pl->pairs+i;
pl=pl->next;
}
for (i=0;i<pl->paircount;i++,j++)
sortedpairs[j]=pl->pairs+i;
count=filterandsortpairs(sortedpairs,pairs->count,options);
for (i=0;i < count;i++)
printapair(i,sortedpairs[i],options);
}
#ifdef MASKEDCODE
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
{
uint32_t i;
uint32_t wordsize = options->primer_length;
@ -417,6 +507,9 @@ int main(int argc, char **argv)
fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);
updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples);
options.dbsize=seqdbsize;
options.insamples=insamples;
options.outsamples=outsamples;
rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options);
@ -473,7 +566,7 @@ int main(int argc, char **argv)
// setoktaxforspecificity (&pairs);
// printpairs (pairs, &options, rankdbstats, seqdbsize);
printpairs (pairs, &options);

View File

@ -165,7 +165,9 @@ typedef struct {
uint32_t mind; //< minimum distance between primers
uint32_t maxd; //< maximum distance between primers
uint32_t sumd; //< distance sum
float yule;
float quorumin;
float quorumout;
//
// uint32_t ampsetcount;
// uint32_t ampsetindex;
@ -192,6 +194,7 @@ typedef struct {
ppairlist_t first;
ppairlist_t last;
void *tree;
int32_t count;
} pairtree_t, *ppairtree_t;
typedef struct {
@ -234,6 +237,9 @@ typedef struct {
bool_t no_multi_match;
char taxonrank[20]; //TR to count ranks against a pair
int32_t taxonrankidx; //TR to count ranks against a pair
int32_t dbsize;
int32_t insamples;
int32_t outsamples;
} options_t, *poptions_t;
typedef ecoseq_t **pecodnadb_t;

View File

@ -174,87 +174,9 @@ ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t
{
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
}
// if (seqmatchcount.matchcount == 0) continue;
//
// for (j=0; j < seqmatchcount.matchcount; j++)
// {
// strt = 0;
// w1 = seqmatchcount.matches[j].word;
// /*first word should b on direct strand*/
// if (!seqmatchcount.matches[j].strand)
// w1 = ecoComplementWord(w1, options->primer_length);
// else
// strt = options->primer_length;
//
// for (k=j+1; k < seqmatchcount.matchcount; k++)
// {
// end = 0;
// w2 = seqmatchcount.matches[k].word;
// /*second word should be on reverse strand*/
// if (seqmatchcount.matches[k].strand)
// w2 = ecoComplementWord(w2, options->primer_length);
// else
// end = options->primer_length;
//
// if (!(seqmatchcount.matches[j].good || seqmatchcount.matches[k].good)) continue;
// if (w1 == w2) continue;
//
// d = seqmatchcount.matches[k].position - seqmatchcount.matches[j].position;
// if (d >= options->lmin && d <= options->lmax)
// {
// /*get amplified string*/
// amplifia = getamplifia (seqdb[i], seqmatchcount.matches[j].position + strt, d - strt - end);
//
// foundindex = pairinlist(pairs, w1, w2, paircount);
// if (foundindex != -1) /*pair is found*/
// {
// if (seqdb[i]->isexample)
// pairs[foundindex].inexample++;
// else
// pairs[foundindex].outexample++;
//
// if (pairs[foundindex].mind > d) pairs[foundindex].mind = d;
// else if (pairs[foundindex].maxd < d) pairs[foundindex].maxd = d;
//
// oldamp = addamplifiasetelem (&pairs[foundindex], amplifia, seqdb[i]->ranktaxonid);
// /*if exact same string is already in amplifia set then use that for taxon set, it will help for
// * calculating the fully identified taxons i.e specificity, we will compare pointrs instead of strings
// * because same string means same pointer*/
// if (oldamp)
// {
// ECOFREE (amplifia, "free amplifia");
// amplifia = oldamp;
// }
// addtaxampsetelem (&pairs[foundindex], seqdb[i]->ranktaxonid, amplifia);
//
// continue;
// }
//
// if (paircount == pairslots)
// {
// pairslots += 500;
// pairs = ECOREALLOC(pairs, pairslots * sizeof(pairs_t), "Cannot allocate pairs table");
// }
// pairs[paircount].w1 = w1;
// pairs[paircount].w2 = w2;
// if (seqdb[i]->isexample) pairs[paircount].inexample = 1;
// else pairs[paircount].outexample = 1;
// pairs[paircount].mind = d;
// pairs[paircount].maxd = d;
// oldamp = addamplifiasetelem (&pairs[paircount], amplifia, seqdb[i]->ranktaxonid);
// addtaxampsetelem (&pairs[paircount], seqdb[i]->ranktaxonid, amplifia);
//
// paircount++;
// }
// else if (d > options->lmax)
// break; /*once if the distance is greater than lmax then it will keep on increasing*/
// }
// }
// ECOFREE(seqmatchcount.matches, "Cannot free matches table");
// }
// primerpairs.pairs = ECOREALLOC(pairs, paircount * sizeof(pairs_t), "Cannot allocate pairs table");
// primerpairs.paircount = paircount;
// return primerpairs;
return primerpairs;
}
#define DMAX (2000000000)
@ -327,7 +249,7 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
if (matchcount>1)
{
fprintf(stderr,"\n====================================\n");
// fprintf(stderr,"\n====================================\n");
sortmatch(matches,matchcount); // sort in ascending order by position
@ -425,30 +347,30 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
pcurrent->pcr.ampcount++;
fprintf(stderr,"%c%c W1 : %s direct : %c",
"bG"[(int)pcurrent->p1->good],
"bG"[(int)pcurrent->p2->good],
ecoUnhashWord(pcurrent->p1->word, options->primer_length),
"><"[(int)pcurrent->asdirect1]
);
fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n",
ecoUnhashWord(pcurrent->p2->word, options->primer_length),
"><"[(int)pcurrent->asdirect2],
distance,
pcurrent->mind,pcurrent->maxd,
(pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0,
pcurrent->inexample,pcurrent->outexample,
" N"[(pcurrent->outexample+pcurrent->inexample)==1],
paircount
);
// fprintf(stderr,"%c%c W1 : %s direct : %c",
// "bG"[(int)pcurrent->p1->good],
// "bG"[(int)pcurrent->p2->good],
// ecoUnhashWord(pcurrent->p1->word, options->primer_length),
// "><"[(int)pcurrent->asdirect1]
// );
//
// fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n",
// ecoUnhashWord(pcurrent->p2->word, options->primer_length),
// "><"[(int)pcurrent->asdirect2],
// distance,
// pcurrent->mind,pcurrent->maxd,
// (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0,
// pcurrent->inexample,pcurrent->outexample,
// " N"[(pcurrent->outexample+pcurrent->inexample)==1],
// paircount
//
// );
//
}
}
}
pairs->count=paircount;
}

View File

@ -130,5 +130,7 @@ ppairtree_t initpairtree(ppairtree_t tree)
tree->last=tree->first;
tree->tree=NULL;
tree->count=0;
return tree;
}

View File

@ -0,0 +1,46 @@
/*
* taxstats.c
*
* Created on: 12 mars 2009
* Author: coissac
*/
#include <search.h>
static int cmptaxon(void *t1, void* t2);
static int cmptaxon(void *t1, void* t2)
{
if (*t1 < *t2)
return -1;
if (*t1 > *t2)
return +1;
return 0;
}
float taxonomycoverage(ppair_t pair, poptions_t options)
{
int32_t seqcount;
int32_t *taxon;
int32_t i;
void **taxontree;
ppair_t *inserted;
int32_t taxoncount;
seqcount=pair->pcr.ampcount;
taxon = ECOMALLOC(seqcount * sizeof(int32_t));
taxoncount=0;
taxontree=NULL;
for (i=0; i < seqcount; i++)
if (pair->pcr.amplifias[i].sequence->isexample)
{
inserted = tsearch(pair->pcr.amplifias[i].sequence->ranktaxonid,
taxontree,
cmptaxon);
}
}