First run of patch
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@203 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
146
src/ecoprimer.c
146
src/ecoprimer.c
@ -16,7 +16,10 @@
|
||||
|
||||
#define VERSION "0.1"
|
||||
/* TR: by default, statistics are made on species level*/
|
||||
#define DEFULTTAXONRANK "species"
|
||||
#define DEFAULTTAXONRANK "species"
|
||||
|
||||
static int cmpprintedpairs(const void* p1,const void* p2);
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* printout help */
|
||||
@ -69,10 +72,11 @@ static void PrintHelp()
|
||||
PP "column 8 : in taxa count\n");
|
||||
PP "column 9 : out taxa count\n");
|
||||
PP "column 10 : coverage\n");
|
||||
PP "column 11 : specificity\n");
|
||||
PP "column 12 : minimum amplified length\n");
|
||||
PP "column 13 : maximum amplified length\n");
|
||||
PP "column 14 : average amplified length\n");
|
||||
PP "column 11 : unambiguously identified taxa\n");
|
||||
PP "column 12 : specificity\n");
|
||||
PP "column 13 : minimum amplified length\n");
|
||||
PP "column 14 : maximum amplified length\n");
|
||||
PP "column 15 : average amplified length\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n");
|
||||
PP "------------------------------------------\n\n");
|
||||
@ -110,7 +114,7 @@ void initoptions(poptions_t options)
|
||||
options->r=0;
|
||||
options->g=0;
|
||||
options->no_multi_match=FALSE;
|
||||
strcpy(options->taxonrank, DEFULTTAXONRANK); /*taxon level for results, species by default*/
|
||||
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
|
||||
}
|
||||
|
||||
void printcurrenttime ()
|
||||
@ -143,7 +147,6 @@ void printcurrenttimeinmilli()
|
||||
|
||||
void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
{
|
||||
uint32_t wellidentifiedtaxa;
|
||||
|
||||
printf("%6d\t",index);
|
||||
if (pair->asdirect1)
|
||||
@ -167,17 +170,11 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
|
||||
printf("\t%d", pair->intaxa);
|
||||
printf("\t%d", pair->outtaxa);
|
||||
printf("\t%4.3f", (float)pair->intaxa/options->intaxa);
|
||||
printf("\t%4.3f", (float)pair->bc);
|
||||
|
||||
wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa;
|
||||
printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa);
|
||||
|
||||
//printf("\t%d", pair->notwellidentifiedtaxa);
|
||||
|
||||
//printf("\t%d", (pair->intaxa + pair->outtaxa));
|
||||
|
||||
|
||||
|
||||
printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa));
|
||||
printf("\t%4.3f", pair->bs);
|
||||
|
||||
|
||||
printf("\t%d", pair->mind);
|
||||
@ -186,6 +183,25 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
|
||||
}
|
||||
|
||||
static int cmpprintedpairs(const void* p1,const void* p2)
|
||||
{
|
||||
float s1,s2;
|
||||
ppair_t pair1,pair2;
|
||||
|
||||
pair1=*((ppair_t*)p1);
|
||||
pair2=*((ppair_t*)p2);
|
||||
|
||||
s1 = pair1->yule * pair1->bs;
|
||||
s2 = pair2->yule * pair2->bs;
|
||||
|
||||
// fprintf(stderr,"s1 : %4.3f %4.3f %4.3f\n",pair1->yule , pair1->bs,s1);
|
||||
// fprintf(stderr,"s2 : %4.3f %4.3f %4.3f\n\n",pair2->yule , pair2->bs,s2);
|
||||
|
||||
if (s1 > s2) return -1;
|
||||
if (s1 < s2) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
|
||||
{
|
||||
uint32_t i,j;
|
||||
@ -201,6 +217,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
|
||||
else qfp=0.0;
|
||||
|
||||
|
||||
sortedpairs[i]->quorumin = q;
|
||||
sortedpairs[i]->quorumout = qfp;
|
||||
sortedpairs[i]->yule = q - qfp;
|
||||
@ -215,20 +232,27 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
{
|
||||
(void)taxonomycoverage(sortedpairs[j],options);
|
||||
taxonomyspecificity(sortedpairs[j]);
|
||||
// fprintf(stderr,"%4.3f %4.3f %4.3f\n",sortedpairs[j]->yule , sortedpairs[j]->bs,sortedpairs[j]->bc);
|
||||
|
||||
j++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs);
|
||||
|
||||
return j;
|
||||
}
|
||||
|
||||
|
||||
void printpairs (ppairtree_t pairs, poptions_t options)
|
||||
{
|
||||
ppair_t* sortedpairs;
|
||||
ppair_t* index;
|
||||
ppairlist_t pl;
|
||||
size_t i,j;
|
||||
int32_t count;
|
||||
size_t count;
|
||||
|
||||
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
|
||||
|
||||
@ -256,40 +280,6 @@ void printpairs (ppairtree_t pairs, poptions_t options)
|
||||
}
|
||||
|
||||
|
||||
#ifdef MASKEDCODE
|
||||
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
|
||||
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t wordsize = options->primer_length;
|
||||
uint32_t quorumseqs;
|
||||
double sens;
|
||||
double speci;
|
||||
float avg;
|
||||
|
||||
quorumseqs = seqdbsize * 70 / 100;
|
||||
|
||||
printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);
|
||||
|
||||
for (i=0; i < pairs.paircount; i++)
|
||||
{
|
||||
if (quorumseqs > pairs.pairs[i].inexample) continue;
|
||||
sens = (pairs.pairs[i].taxsetindex*1.0)/rankdbstats*100;
|
||||
speci = (pairs.pairs[i].oktaxoncount*1.0)/pairs.pairs[i].taxsetindex*100;
|
||||
avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2;
|
||||
|
||||
printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize));
|
||||
printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize));
|
||||
printf("\t%d", pairs.pairs[i].inexample);
|
||||
printf("\t%3.2f", sens);
|
||||
printf("\t%3.2f", speci);
|
||||
printf("\t%d", pairs.pairs[i].mind);
|
||||
printf("\t%d", pairs.pairs[i].maxd);
|
||||
printf("\t%3.2f\n", avg);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* MASKEDCODE */
|
||||
|
||||
/*updateseqparams: This function counts the insample and outsample sequences
|
||||
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
|
||||
@ -340,52 +330,6 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
|
||||
}
|
||||
/* to get db stats, totals of species, genus etc....*/
|
||||
|
||||
#ifdef MASKEDCODE
|
||||
|
||||
void setoktaxforspecificity (ppairtree_t pairs)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t k;
|
||||
uint32_t l;
|
||||
int taxcount;
|
||||
int32_t taxid;
|
||||
|
||||
for (i = 0; i < pairs->paircount; i++)
|
||||
{
|
||||
for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
|
||||
{
|
||||
for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++)
|
||||
{
|
||||
taxid = 0;
|
||||
taxcount = 0;
|
||||
for (l = 0; l < pairs->pairs[i].ampsetindex; l++)
|
||||
{
|
||||
/*compare only char pointers because for equal strings we have same pointer in both sets*/
|
||||
if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia)
|
||||
{
|
||||
if (pairs->pairs[i].ampset[l].seqidindex > 1)
|
||||
{
|
||||
taxcount += pairs->pairs[i].ampset[l].seqidindex;
|
||||
break;
|
||||
}
|
||||
|
||||
if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
|
||||
{
|
||||
if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
|
||||
taxcount++;
|
||||
}
|
||||
|
||||
if (taxcount > 1) break;
|
||||
}
|
||||
}
|
||||
if (taxcount == 1) pairs->pairs[i].oktaxoncount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
@ -412,7 +356,7 @@ int main(int argc, char **argv)
|
||||
|
||||
initoptions(&options);
|
||||
|
||||
while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) {
|
||||
while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* -------------------- */
|
||||
@ -514,6 +458,12 @@ int main(int argc, char **argv)
|
||||
"Error on excluded_taxid reallocation");
|
||||
sscanf(optarg,"%d",&(options.ignored_taxid[options.g]));
|
||||
options.g++;
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'O': /* set primer size */
|
||||
/* --------------------------------- */
|
||||
sscanf(optarg,"%d",&(options.primer_length));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
|
@ -170,6 +170,8 @@ typedef struct {
|
||||
float yule;
|
||||
float quorumin;
|
||||
float quorumout;
|
||||
float bs;
|
||||
float bc;
|
||||
//
|
||||
// uint32_t taxsetcount;
|
||||
// uint32_t taxsetindex;
|
||||
|
@ -108,18 +108,21 @@ float taxonomycoverage(ppair_t pair, poptions_t options)
|
||||
|
||||
counttaxon(-1);
|
||||
for (i=0; i < seqcount; i++)
|
||||
if (pair->pcr.amplifias[i].sequence->isexample)
|
||||
if (pair->pcr.amplifias[i].sequence->isexample
|
||||
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
|
||||
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
||||
|
||||
counttaxon(-1);
|
||||
for (i=0; i < seqcount; i++)
|
||||
if (!pair->pcr.amplifias[i].sequence->isexample)
|
||||
if (!pair->pcr.amplifias[i].sequence->isexample
|
||||
&& pair->pcr.amplifias[i].sequence->ranktaxonid)
|
||||
outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
||||
|
||||
|
||||
pair->intaxa=incount;
|
||||
pair->outtaxa=outcount;
|
||||
return (float)incount/options->intaxa;
|
||||
pair->bc=(float)incount/options->intaxa;
|
||||
return pair->bc;
|
||||
}
|
||||
|
||||
|
||||
@ -190,6 +193,8 @@ void taxonomyspecificity (ppair_t pair)
|
||||
{
|
||||
/*populate taxon ids tree against each unique amplifia
|
||||
i.e set of taxon ids for each amplifia*/
|
||||
if (pair->pcr.amplifias[i].sequence->isexample)
|
||||
{
|
||||
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
|
||||
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
|
||||
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
|
||||
@ -211,6 +216,7 @@ void taxonomyspecificity (ppair_t pair)
|
||||
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
counttaxon(-1);
|
||||
for (i = 0; i < ampfindex; i++)
|
||||
@ -220,5 +226,8 @@ void taxonomyspecificity (ppair_t pair)
|
||||
}
|
||||
|
||||
pair->notwellidentifiedtaxa = counttaxon(-2);
|
||||
pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
|
||||
|
||||
ECOFREE (ampfwithtaxtree, "Free amplifia table");
|
||||
|
||||
}
|
Reference in New Issue
Block a user