First run of patch
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@203 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
160
src/ecoprimer.c
160
src/ecoprimer.c
@ -16,7 +16,10 @@
|
||||
|
||||
#define VERSION "0.1"
|
||||
/* TR: by default, statistics are made on species level*/
|
||||
#define DEFULTTAXONRANK "species"
|
||||
#define DEFAULTTAXONRANK "species"
|
||||
|
||||
static int cmpprintedpairs(const void* p1,const void* p2);
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* printout help */
|
||||
@ -40,7 +43,7 @@ static void PrintHelp()
|
||||
PP " .tdx : contains information concerning the taxonomy\n");
|
||||
PP " .rdx : contains the taxonomy rank\n\n");
|
||||
PP " ecoPrimer needs all the file type. As a result, you have to write the\n");
|
||||
PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n");
|
||||
PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n");
|
||||
PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
|
||||
PP "-h : [H]elp - print <this> help\n\n");
|
||||
PP "-i : [I]gnore the given taxonomy id.\n\n");
|
||||
@ -69,13 +72,14 @@ static void PrintHelp()
|
||||
PP "column 8 : in taxa count\n");
|
||||
PP "column 9 : out taxa count\n");
|
||||
PP "column 10 : coverage\n");
|
||||
PP "column 11 : specificity\n");
|
||||
PP "column 12 : minimum amplified length\n");
|
||||
PP "column 13 : maximum amplified length\n");
|
||||
PP "column 14 : average amplified length\n");
|
||||
PP "column 11 : unambiguously identified taxa\n");
|
||||
PP "column 12 : specificity\n");
|
||||
PP "column 13 : minimum amplified length\n");
|
||||
PP "column 14 : maximum amplified length\n");
|
||||
PP "column 15 : average amplified length\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n");
|
||||
PP "------------------------------------------\n\n");
|
||||
PP "------------------------------------------\n\n");
|
||||
PP "\n");
|
||||
|
||||
}
|
||||
@ -110,7 +114,7 @@ void initoptions(poptions_t options)
|
||||
options->r=0;
|
||||
options->g=0;
|
||||
options->no_multi_match=FALSE;
|
||||
strcpy(options->taxonrank, DEFULTTAXONRANK); /*taxon level for results, species by default*/
|
||||
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
|
||||
}
|
||||
|
||||
void printcurrenttime ()
|
||||
@ -143,8 +147,7 @@ void printcurrenttimeinmilli()
|
||||
|
||||
void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
{
|
||||
uint32_t wellidentifiedtaxa;
|
||||
|
||||
|
||||
printf("%6d\t",index);
|
||||
if (pair->asdirect1)
|
||||
printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
|
||||
@ -167,17 +170,11 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
|
||||
printf("\t%d", pair->intaxa);
|
||||
printf("\t%d", pair->outtaxa);
|
||||
printf("\t%4.3f", (float)pair->intaxa/options->intaxa);
|
||||
|
||||
wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa;
|
||||
|
||||
//printf("\t%d", pair->notwellidentifiedtaxa);
|
||||
|
||||
//printf("\t%d", (pair->intaxa + pair->outtaxa));
|
||||
printf("\t%4.3f", (float)pair->bc);
|
||||
|
||||
printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa);
|
||||
|
||||
|
||||
printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa));
|
||||
printf("\t%4.3f", pair->bs);
|
||||
|
||||
|
||||
printf("\t%d", pair->mind);
|
||||
@ -186,6 +183,25 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
|
||||
}
|
||||
|
||||
static int cmpprintedpairs(const void* p1,const void* p2)
|
||||
{
|
||||
float s1,s2;
|
||||
ppair_t pair1,pair2;
|
||||
|
||||
pair1=*((ppair_t*)p1);
|
||||
pair2=*((ppair_t*)p2);
|
||||
|
||||
s1 = pair1->yule * pair1->bs;
|
||||
s2 = pair2->yule * pair2->bs;
|
||||
|
||||
// fprintf(stderr,"s1 : %4.3f %4.3f %4.3f\n",pair1->yule , pair1->bs,s1);
|
||||
// fprintf(stderr,"s2 : %4.3f %4.3f %4.3f\n\n",pair2->yule , pair2->bs,s2);
|
||||
|
||||
if (s1 > s2) return -1;
|
||||
if (s1 < s2) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
|
||||
{
|
||||
uint32_t i,j;
|
||||
@ -201,9 +217,10 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
|
||||
else qfp=0.0;
|
||||
|
||||
|
||||
sortedpairs[i]->quorumin = q;
|
||||
sortedpairs[i]->quorumout = qfp;
|
||||
sortedpairs[i]->yule = q -qfp;
|
||||
sortedpairs[i]->yule = q - qfp;
|
||||
|
||||
|
||||
sortedpairs[j]=sortedpairs[i];
|
||||
@ -215,23 +232,30 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
{
|
||||
(void)taxonomycoverage(sortedpairs[j],options);
|
||||
taxonomyspecificity(sortedpairs[j]);
|
||||
// fprintf(stderr,"%4.3f %4.3f %4.3f\n",sortedpairs[j]->yule , sortedpairs[j]->bs,sortedpairs[j]->bc);
|
||||
|
||||
j++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs);
|
||||
|
||||
return j;
|
||||
}
|
||||
|
||||
|
||||
void printpairs (ppairtree_t pairs, poptions_t options)
|
||||
{
|
||||
ppair_t* sortedpairs;
|
||||
ppair_t* index;
|
||||
ppairlist_t pl;
|
||||
size_t i,j;
|
||||
int32_t count;
|
||||
size_t count;
|
||||
|
||||
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
|
||||
|
||||
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
|
||||
|
||||
fprintf(stderr,"Total pair count : %d\n",pairs->count);
|
||||
|
||||
sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs");
|
||||
@ -256,40 +280,6 @@ void printpairs (ppairtree_t pairs, poptions_t options)
|
||||
}
|
||||
|
||||
|
||||
#ifdef MASKEDCODE
|
||||
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
|
||||
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t wordsize = options->primer_length;
|
||||
uint32_t quorumseqs;
|
||||
double sens;
|
||||
double speci;
|
||||
float avg;
|
||||
|
||||
quorumseqs = seqdbsize * 70 / 100;
|
||||
|
||||
printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);
|
||||
|
||||
for (i=0; i < pairs.paircount; i++)
|
||||
{
|
||||
if (quorumseqs > pairs.pairs[i].inexample) continue;
|
||||
sens = (pairs.pairs[i].taxsetindex*1.0)/rankdbstats*100;
|
||||
speci = (pairs.pairs[i].oktaxoncount*1.0)/pairs.pairs[i].taxsetindex*100;
|
||||
avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2;
|
||||
|
||||
printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize));
|
||||
printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize));
|
||||
printf("\t%d", pairs.pairs[i].inexample);
|
||||
printf("\t%3.2f", sens);
|
||||
printf("\t%3.2f", speci);
|
||||
printf("\t%d", pairs.pairs[i].mind);
|
||||
printf("\t%d", pairs.pairs[i].maxd);
|
||||
printf("\t%3.2f\n", avg);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* MASKEDCODE */
|
||||
|
||||
/*updateseqparams: This function counts the insample and outsample sequences
|
||||
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
|
||||
@ -340,52 +330,6 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
|
||||
}
|
||||
/* to get db stats, totals of species, genus etc....*/
|
||||
|
||||
#ifdef MASKEDCODE
|
||||
|
||||
void setoktaxforspecificity (ppairtree_t pairs)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t k;
|
||||
uint32_t l;
|
||||
int taxcount;
|
||||
int32_t taxid;
|
||||
|
||||
for (i = 0; i < pairs->paircount; i++)
|
||||
{
|
||||
for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
|
||||
{
|
||||
for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++)
|
||||
{
|
||||
taxid = 0;
|
||||
taxcount = 0;
|
||||
for (l = 0; l < pairs->pairs[i].ampsetindex; l++)
|
||||
{
|
||||
/*compare only char pointers because for equal strings we have same pointer in both sets*/
|
||||
if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia)
|
||||
{
|
||||
if (pairs->pairs[i].ampset[l].seqidindex > 1)
|
||||
{
|
||||
taxcount += pairs->pairs[i].ampset[l].seqidindex;
|
||||
break;
|
||||
}
|
||||
|
||||
if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
|
||||
{
|
||||
if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
|
||||
taxcount++;
|
||||
}
|
||||
|
||||
if (taxcount > 1) break;
|
||||
}
|
||||
}
|
||||
if (taxcount == 1) pairs->pairs[i].oktaxoncount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
@ -412,7 +356,7 @@ int main(int argc, char **argv)
|
||||
|
||||
initoptions(&options);
|
||||
|
||||
while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) {
|
||||
while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* -------------------- */
|
||||
@ -516,6 +460,12 @@ int main(int argc, char **argv)
|
||||
options.g++;
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'O': /* set primer size */
|
||||
/* --------------------------------- */
|
||||
sscanf(optarg,"%d",&(options.primer_length));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'c': /* sequences are circular */
|
||||
/* --------------------------------- */
|
||||
@ -601,7 +551,7 @@ int main(int argc, char **argv)
|
||||
|
||||
/*TR: Added*/
|
||||
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
|
||||
|
||||
|
||||
|
||||
// setoktaxforspecificity (&pairs);
|
||||
|
||||
|
@ -104,7 +104,7 @@ typedef struct {
|
||||
* on all sequences as a list of primer_t
|
||||
*/
|
||||
typedef struct {
|
||||
pprimer_t primers;
|
||||
pprimer_t primers;
|
||||
uint32_t size;
|
||||
} primercount_t, *pprimercount_t;
|
||||
|
||||
@ -170,6 +170,8 @@ typedef struct {
|
||||
float yule;
|
||||
float quorumin;
|
||||
float quorumout;
|
||||
float bs;
|
||||
float bc;
|
||||
//
|
||||
// uint32_t taxsetcount;
|
||||
// uint32_t taxsetindex;
|
||||
@ -224,7 +226,7 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
void *amptree;
|
||||
void *amptree;
|
||||
}taxontoamp_t, *ptaxontoamp_t;
|
||||
|
||||
typedef struct {
|
||||
|
@ -40,7 +40,7 @@ int32_t counttaxon(int32_t taxid)
|
||||
taxoncount=0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))))
|
||||
{
|
||||
@ -108,18 +108,21 @@ float taxonomycoverage(ppair_t pair, poptions_t options)
|
||||
|
||||
counttaxon(-1);
|
||||
for (i=0; i < seqcount; i++)
|
||||
if (pair->pcr.amplifias[i].sequence->isexample)
|
||||
if (pair->pcr.amplifias[i].sequence->isexample
|
||||
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
|
||||
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
||||
|
||||
counttaxon(-1);
|
||||
for (i=0; i < seqcount; i++)
|
||||
if (!pair->pcr.amplifias[i].sequence->isexample)
|
||||
if (!pair->pcr.amplifias[i].sequence->isexample
|
||||
&& pair->pcr.amplifias[i].sequence->ranktaxonid)
|
||||
outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
||||
|
||||
|
||||
pair->intaxa=incount;
|
||||
pair->outtaxa=outcount;
|
||||
return (float)incount/options->intaxa;
|
||||
pair->bc=(float)incount/options->intaxa;
|
||||
return pair->bc;
|
||||
}
|
||||
|
||||
|
||||
@ -132,11 +135,11 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
char cd2;
|
||||
int chd = 0;
|
||||
int len = 0;
|
||||
|
||||
|
||||
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
|
||||
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
|
||||
|
||||
|
||||
|
||||
|
||||
if (pampf1->strand != pampf2->strand)
|
||||
{
|
||||
incr = -1;
|
||||
@ -148,9 +151,9 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
chd = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
|
||||
|
||||
|
||||
for (i = 0; i < len; i++, j += incr)
|
||||
{
|
||||
cd1 = pampf1->amplifia[i];
|
||||
@ -158,14 +161,14 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
cd2 = ecoComplementChar(pampf2->amplifia[j]);
|
||||
else
|
||||
cd2 = pampf2->amplifia[j];
|
||||
|
||||
|
||||
if (cd1 < cd2) return chd ? 1: -1;
|
||||
if (cd2 < cd1) return chd ? -1: 1;
|
||||
}
|
||||
|
||||
|
||||
if (pampf1->length > pampf2->length) return chd ? -1: 1;
|
||||
if (pampf2->length > pampf1->length) return chd ? 1: -1;
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -183,42 +186,48 @@ void taxonomyspecificity (ppair_t pair)
|
||||
void *ampftree = NULL;
|
||||
pamptotaxon_t pcurrentampf;
|
||||
pamptotaxon_t *ptmp;
|
||||
|
||||
|
||||
pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree");
|
||||
|
||||
|
||||
for (i = 0; i < pair->pcr.ampcount; i++)
|
||||
{
|
||||
/*populate taxon ids tree against each unique amplifia
|
||||
i.e set of taxon ids for each amplifia*/
|
||||
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
|
||||
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
|
||||
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
|
||||
pcurrentampf = &fwithtaxtree[ampfindex];
|
||||
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
|
||||
ptmp = tfind((const void*)pcurrentampf, &ftree, cmpamp);
|
||||
if (ptmp == NULL)
|
||||
if (pair->pcr.amplifias[i].sequence->isexample)
|
||||
{
|
||||
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
|
||||
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
|
||||
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
|
||||
pcurrentampf = &fwithtaxtree[ampfindex];
|
||||
tsearch((void*)pcurrentampf,&ftree,cmpamp);
|
||||
ampfindex++;
|
||||
}
|
||||
else
|
||||
pcurrentampf = *ptmp;
|
||||
|
||||
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
|
||||
{
|
||||
pcurrentampf->taxoncount++;
|
||||
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
|
||||
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
|
||||
ptmp = tfind((const void*)pcurrentampf, &ftree, cmpamp);
|
||||
if (ptmp == NULL)
|
||||
{
|
||||
pcurrentampf = &fwithtaxtree[ampfindex];
|
||||
tsearch((void*)pcurrentampf,&ftree,cmpamp);
|
||||
ampfindex++;
|
||||
}
|
||||
else
|
||||
pcurrentampf = *ptmp;
|
||||
|
||||
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
|
||||
{
|
||||
pcurrentampf->taxoncount++;
|
||||
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
counttaxon(-1);
|
||||
for (i = 0; i < ampfindex; i++)
|
||||
{
|
||||
if (ampfwithtaxtree[i].taxoncount > 1)
|
||||
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
|
||||
}
|
||||
|
||||
|
||||
pair->notwellidentifiedtaxa = counttaxon(-2);
|
||||
pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
|
||||
|
||||
ECOFREE (ampfwithtaxtree, "Free amplifia table");
|
||||
}
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user