16 Commits

Author SHA1 Message Date
68c4743303 added print help
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@198 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-04-14 11:06:26 +00:00
035495b2a1 fixed twalk action
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@197 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-26 15:15:08 +00:00
3c21789533 Removed some problems for specificity
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@196 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-26 00:22:01 +00:00
75a6dac09a git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@195 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2009-03-25 10:18:34 +00:00
6726294c78 Removed compilation errors and added new implementation of ecoComplementChar
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@194 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-22 17:44:59 +00:00
584d3c406d added compare function for amplifias
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@193 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-20 11:59:55 +00:00
0440b8d761 Switch strict_three_prime option default value to 0 as a workaround bug patch
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@192 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-18 10:04:20 +00:00
db061fbb12 Patch print function for reverse primer
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@191 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-17 10:44:00 +00:00
ac52f3303d with full coverage statistic
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@190 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-15 21:53:24 +00:00
29820c1e26 with full coverage statistic
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@189 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-15 21:52:57 +00:00
5d212d5753 with full coverage statistic
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@188 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-15 21:52:29 +00:00
9908a40aaa with full coverage statistic
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@187 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-15 21:52:06 +00:00
665b22989f first version with preliminary print function
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@186 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-12 09:55:01 +00:00
d911d6bd20 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@185 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2009-03-10 08:49:11 +00:00
dffebd5826 New version of pairing algorithm (alpha)
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@184 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-09 11:05:51 +00:00
2be5f2659b algo change test for eric
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@183 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-06 07:25:02 +00:00
33 changed files with 1380 additions and 264 deletions

Binary file not shown.

View File

@ -25,9 +25,59 @@
static void PrintHelp()
{
PP "------------------------------------------\n");
PP " ecoPrimer Version %s\n", VERSION);
PP "------------------------------------------\n");
PP "------------------------------------------\n");
PP " ecoPrimer Version %s\n", VERSION);
PP "------------------------------------------\n");
PP "synopsis : finding primers and measureing the quality of primers and barcode region\n");
PP "usage: ./ecoPrimer [options] \n");
PP "------------------------------------------\n");
PP "options:\n");
PP "-d : [D]atabase : to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
PP " in the ecoPCR/tools directory.\n");
PP " ecoPCRFormat.py creates three file types :\n");
PP " .sdx : contains the sequences\n");
PP " .tdx : contains information concerning the taxonomy\n");
PP " .rdx : contains the taxonomy rank\n\n");
PP " ecoPrimer needs all the file type. As a result, you have to write the\n");
PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n");
PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-i : [I]gnore the given taxonomy id.\n\n");
PP "-l : minimum [L]ength : define the minimum amplication length. \n\n");
PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n");
PP "-r : [R]estricts the search to the given taxonomic id.\n\n");
PP "-c : Consider that the database sequences are [c]ircular\n\n");
PP "-3 : Three prime strict match\n\n");
PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n");
PP "-s : [S]ensitivity quorum\n\n");
PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n");
PP "-x : false positive quorum\n\n");
PP "-D : set in [d]ouble strand mode\n\n");
PP "-S : Set in [s]ingle strand mode\n\n");
PP "-U : No multi match\n\n");
PP "\n");
PP "------------------------------------------\n");
PP "Table result description : \n");
PP "column 1 : serial number\n");
PP "column 2 : primer1\n");
PP "column 3 : primer2\n");
PP "column 4 : good/bad\n");
PP "column 5 : in sequence count\n");
PP "column 6 : out sequence count\n");
PP "column 7 : yule\n");
PP "column 8 : in taxa count\n");
PP "column 9 : out taxa count\n");
PP "column 10 : coverage\n");
PP "column 11 : specificity\n");
PP "column 12 : minimum amplified length\n");
PP "column 13 : maximum amplified length\n");
PP "column 14 : average amplified length\n");
PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n");
PP "------------------------------------------\n\n");
PP "\n");
}
static void ExitUsage(int stat)
@ -56,7 +106,7 @@ void initoptions(poptions_t options)
options->strict_exclude_quorum=0.1;
options->sensitivity_quorum=0.9;
options->false_positive_quorum=0.1;
options->strict_three_prime=2;
options->strict_three_prime=0;
options->r=0;
options->g=0;
options->no_multi_match=FALSE;
@ -75,7 +125,7 @@ void printcurrenttime ()
/* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
ts = localtime(&now);
strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
fprintf(stderr,"#%d#, %s\n",now, buf);
fprintf(stderr,"#%d#, %s\n",(int)now, buf);
}
void printcurrenttimeinmilli()
@ -90,7 +140,125 @@ void printcurrenttimeinmilli()
}
/*TR: Added*/
void printapair(int32_t index,ppair_t pair, poptions_t options)
{
uint32_t wellidentifiedtaxa;
printf("%6d\t",index);
if (pair->asdirect1)
printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
else
printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word,
options->primer_length),options->primer_length));
if (pair->asdirect2)
printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length));
else
printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word,
options->primer_length),options->primer_length));
printf("\t%c%c", "bG"[(int)pair->p1->good],"bG"[(int)pair->p2->good]);
printf("\t%d", pair->inexample);
printf("\t%d", pair->outexample);
printf("\t%4.3f", pair->yule);
printf("\t%d", pair->intaxa);
printf("\t%d", pair->outtaxa);
printf("\t%4.3f", (float)pair->intaxa/options->intaxa);
wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa;
//printf("\t%d", pair->notwellidentifiedtaxa);
//printf("\t%d", (pair->intaxa + pair->outtaxa));
printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa));
printf("\t%d", pair->mind);
printf("\t%d", pair->maxd);
printf("\t%3.2f\n", (float)pair->sumd/pair->inexample);
}
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
{
uint32_t i,j;
float q,qfp;
for (i=0,j=0;i < count;i++)
{
if (options->insamples)
q = (float)sortedpairs[i]->inexample/options->insamples;
else q=1.0;
if (options->outsamples)
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0;
sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q -qfp;
sortedpairs[j]=sortedpairs[i];
if (q > options->sensitivity_quorum &&
qfp < options->false_positive_quorum)
{
(void)taxonomycoverage(sortedpairs[j],options);
taxonomyspecificity(sortedpairs[j]);
j++;
}
}
return j;
}
void printpairs (ppairtree_t pairs, poptions_t options)
{
ppair_t* sortedpairs;
ppair_t* index;
ppairlist_t pl;
size_t i,j;
int32_t count;
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
fprintf(stderr,"Total pair count : %d\n",pairs->count);
sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs");
index=sortedpairs;
pl=pairs->first;
j=0;
while(pl->next)
{
for (i=0;i<pl->paircount;i++,j++)
sortedpairs[j]=pl->pairs+i;
pl=pl->next;
}
for (i=0;i<pl->paircount;i++,j++)
sortedpairs[j]=pl->pairs+i;
count=filterandsortpairs(sortedpairs,pairs->count,options);
for (i=0;i < count;i++)
printapair(i,sortedpairs[i],options);
}
#ifdef MASKEDCODE
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
{
uint32_t i;
uint32_t wordsize = options->primer_length;
@ -98,9 +266,9 @@ void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, ui
double sens;
double speci;
float avg;
quorumseqs = seqdbsize * 70 / 100;
printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);
for (i=0; i < pairs.paircount; i++)
@ -121,9 +289,12 @@ void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, ui
}
}
#endif /* MASKEDCODE */
/*updateseqparams: This function counts the insample and outsample sequences
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
poptions_t options, int32_t *insamples, int32_t *outsamples)
{
uint32_t i;
@ -131,7 +302,7 @@ void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxo
ecotx_t *tmptaxon;
for (i=0;i<seqdbsize;i++)
{
{
seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
if (seqdb[i]->isexample)
(*insamples)++;
@ -139,7 +310,7 @@ void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxo
(*outsamples)++;
taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
if (tmptaxon)
tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
if (tmptaxon)
@ -154,7 +325,7 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
/*set taxon rank for which result is to be given*/
for (i = 0; i < taxonomy->ranks->count; i++)
{
if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0)
if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0)
{
options->taxonrankidx = i;
break;
@ -168,47 +339,10 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
}
}
/* to get db stats, totals of species, genus etc....*/
int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
poptions_t options)
{
uint32_t i;
uint32_t j;
uint32_t nameslots = 500;
uint32_t namesindex = 0;
int32_t *ranktaxonids = ECOMALLOC(nameslots * sizeof(int32_t), "Error in taxon rank allocation");
int32_t taxid;
ecotx_t *tmptaxon;
for (i=0;i<seqdbsize;i++)
{
taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
if (tmptaxon)
tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
if (tmptaxon)
{
for (j = 0; j < namesindex; j++)
{
if (tmptaxon->taxid == ranktaxonids[j]) break;
}
if (j < namesindex) continue; /* name is already in list, so no need to add it*/
if (namesindex == nameslots)
{
nameslots += 500;
ranktaxonids = ECOREALLOC(ranktaxonids, nameslots * sizeof(int32_t), "Cannot allocate pair rank taxon table");
}
ranktaxonids[namesindex] = tmptaxon->taxid;
namesindex++;
}
}
ECOFREE(ranktaxonids, "free rank taxon table");
return namesindex;
}
#ifdef MASKEDCODE
void setoktaxforspecificity (ppairscount_t pairs)
void setoktaxforspecificity (ppairtree_t pairs)
{
uint32_t i;
uint32_t j;
@ -216,7 +350,7 @@ void setoktaxforspecificity (ppairscount_t pairs)
uint32_t l;
int taxcount;
int32_t taxid;
for (i = 0; i < pairs->paircount; i++)
{
for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
@ -235,13 +369,13 @@ void setoktaxforspecificity (ppairscount_t pairs)
taxcount += pairs->pairs[i].ampset[l].seqidindex;
break;
}
if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
{
if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
taxcount++;
taxcount++;
}
if (taxcount > 1) break;
}
}
@ -251,6 +385,8 @@ void setoktaxforspecificity (ppairscount_t pairs)
}
}
#endif
int main(int argc, char **argv)
{
pecodnadb_t seqdb; /* of type ecoseq_t */
@ -264,11 +400,11 @@ int main(int argc, char **argv)
int32_t insamples=0;
int32_t outsamples=0;
uint32_t i;
pwordcount_t words;
pprimercount_t primers;
pairscount_t pairs;
pwordcount_t words;
pprimercount_t primers;
ppairtree_t pairs;
int32_t rankdbstats = 0;
//printcurrenttime();
@ -290,9 +426,9 @@ int main(int argc, char **argv)
/* -------------------- */
case 'h': /* help */
/* -------------------- */
PrintHelp();
exit(0);
break;
PrintHelp();
exit(0);
break;
/* ------------------------- */
case 'l': /* min amplification lenght */
@ -337,7 +473,7 @@ int main(int argc, char **argv)
strncpy(options.taxonrank, optarg, 19);
options.taxonrank[19] = 0;
break;
/* -------------------- */
case 'x': /* strict matching quorum */
/* -------------------- */
@ -396,22 +532,27 @@ int main(int argc, char **argv)
fprintf(stderr,"Reading taxonomy database ...");
taxonomy = read_taxonomy(options.prefix,0);
fprintf(stderr,"Ok\n");
setresulttaxonrank(taxonomy, &options); /*TR: set rank level for statistics*/
fprintf(stderr,"Reading sequence database ...\n");
seqdb = readdnadb(options.prefix,&seqdbsize);
fprintf(stderr,"Ok\n");
fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);
updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples);
options.dbsize=seqdbsize;
options.insamples=insamples;
options.outsamples=outsamples;
rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options);
fprintf(stderr,"Database is constituted of %5d examples\n",insamples);
fprintf(stderr," and %5d counterexamples\n",outsamples);
fprintf(stderr,"Database is constituted of %5d examples corresponding to %5d %s\n",insamples,
options.intaxa,options.taxonrank);
fprintf(stderr," and %5d counterexamples corresponding to %5d %s\n",outsamples,
options.outtaxa,options.taxonrank);
fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats);
fprintf(stderr,"\nIndexing words in sequences\n");
@ -419,7 +560,7 @@ int main(int argc, char **argv)
printcurrenttimeinmilli();
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
printcurrenttimeinmilli();
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
if (options.no_multi_match)
@ -460,13 +601,15 @@ int main(int argc, char **argv)
/*TR: Added*/
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
setoktaxforspecificity (&pairs);
printpairs (pairs, &options, rankdbstats, seqdbsize);
ECOFREE(pairs.pairs,"Free pairs table");
// setoktaxforspecificity (&pairs);
printpairs (pairs, &options);
//ECOFREE(pairs.pairs,"Free pairs table");
return 0;
}

15
src/libecoPCR/ecoError.P Normal file
View File

@ -0,0 +1,15 @@
ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -0,0 +1,15 @@
ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

15
src/libecoPCR/ecoMalloc.P Normal file
View File

@ -0,0 +1,15 @@
ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

5
src/libecoPCR/ecodna.P Normal file
View File

@ -0,0 +1,5 @@
ecodna.o ecodna.P : ecodna.c /usr/include/string.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h ecoPCR.h \
/usr/include/stdio.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

View File

@ -0,0 +1,5 @@
ecofilter.o ecofilter.P : ecofilter.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

15
src/libecoPCR/econame.P Normal file
View File

@ -0,0 +1,15 @@
econame.o econame.P : econame.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

15
src/libecoPCR/ecorank.P Normal file
View File

@ -0,0 +1,15 @@
ecorank.o ecorank.P : ecorank.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

19
src/libecoPCR/ecoseq.P Normal file
View File

@ -0,0 +1,19 @@
ecoseq.o ecoseq.P : ecoseq.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h /usr/include/zlib.h /usr/include/zconf.h \
/usr/include/sys/types.h /usr/include/unistd.h \
/usr/include/sys/unistd.h /usr/include/sys/select.h \
/usr/include/sys/_select.h

15
src/libecoPCR/ecotax.P Normal file
View File

@ -0,0 +1,15 @@
ecotax.o ecotax.P : ecotax.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -10,7 +10,9 @@ SOURCES = goodtaxon.c \
queue.c \
libstki.c \
sortmatch.c \
pairtree.c \
pairs.c \
taxstats.c \
apat_search.c
SRCS=$(SOURCES)

View File

@ -0,0 +1,131 @@
/*
* amplifiatree.c
*
* Created on: 7 mars 2009
* Author: coissac
*/
#include "ecoprimer.h"
#include <search.h>
static void cleanamplifia(pamplifia_t amplifia);
static void deleteamplifialist(pamplifialist_t list);
static int cmpamplifia(const void* p1,const void*p2);
static void cleanamplifiatlist(pamplifiacount_t list)
{
if (list->amplifias)
ECOFREE(list->amplifias,
"Free amplifia list");
}
static void cleanamplifia(pamplifia_t amplifia)
{
cleanamplifiatlist(&(amplifia->pcr));
}
static pamplifialist_t newamplifialist(pamplifialist_t parent, size_t size)
{
pamplifialist_t tmp;
tmp=ECOMALLOC(sizeof(amplifialist_t)+sizeof(amplifia_t)*(size-1),
"Cannot allocate new amplifia list");
tmp->amplifiaslots=size;
tmp->amplifiacount=0;
tmp->next=NULL;
if (parent)
parent->next=(void*)tmp;
return tmp;
}
static void deleteamplifialist(pamplifialist_t list)
{
size_t i;
if (list)
{
if (list->next)
{
deleteamplifialist(list->next);
list->next=NULL;
}
for (i=0; i < list->amplifiacount; i++)
cleanamplifia((list->amplifias)+i);
ECOFREE(list,"Delete amplifia list");
}
}
static int cmpamplifia(const void* p1,const void*p2)
{
pamplifia_t pr1,pr2;
pr1=(pamplifia_t)p1;
pr2=(pamplifia_t)p2;
if (pr1->p1 < pr2->p1) return -1;
if (pr1->p1 > pr2->p1) return 1;
if (pr1->asdirect1 < pr2->asdirect1) return -1;
if (pr1->asdirect1 > pr2->asdirect1) return 1;
if (pr1->p2 < pr2->p2) return -1;
if (pr1->p2 > pr2->p2) return 1;
if (pr1->asdirect2 < pr2->asdirect2) return -1;
if (pr1->asdirect2 > pr2->asdirect2) return 1;
return 0;
}
pamplifia_t amplifiaintree (amplifia_t key,
pamplifiatree_t amplifialist)
{
if (!amplifialist->tree)
return NULL;
return *((pamplifia_t*)tsearch((const void *)(&key),
&(amplifialist->tree),
cmpamplifia
));
}
pamplifia_t insertamplifia(amplifia_t key,
pamplifiatree_t list)
{
pamplifia_t current;
pamplifia_t found;
if (list->last->amplifiacount==list->last->amplifiaslots)
{
list->last->next=newamplifialist(list,100);
list->last=list->last->next;
}
current = list->last->amplifias + list->last->amplifiacount;
*current=key;
found = *((pamplifia_t*)tsearch((const void *)current,
&(list->tree),
cmpamplifia));
if (found==current)
list->last->amplifiacount++;
return found;
}
pamplifiatree_t initamplifiatree(pamplifiatree_t tree)
{
if (!tree)
tree = ECOMALLOC(sizeof(amplifiatree_t),"Cannot allocate amplifia tree");
tree->first=newamplifialist(NULL,500);
tree->last=tree->first;
tree->tree=NULL;
}

View File

@ -0,0 +1,17 @@
apat_search.o apat_search.P : apat_search.c /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h libstki.h ecotype.h apat.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
../libecoPCR/ecoPCR.h

View File

@ -0,0 +1,17 @@
aproxpattern.o aproxpattern.P : aproxpattern.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -61,7 +61,7 @@ void encodeSequence(ecoseq_t *seq)
for (i=0;i<seq->SQ_length;i++,data++,cseq++)
{
*data = encoder[(IS_UPPER(*cseq) ? *cseq - 'A' : 'Z')];
*data = encoder[(IS_UPPER(*cseq) ? *cseq : 'Z') - 'A'];
}
}
@ -82,7 +82,7 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3
uint32_t inSequenceQuorum;
uint32_t outSequenceQuorum;
bool_t conserved = TRUE;
//poslist_t ttt;

View File

@ -79,28 +79,39 @@ typedef union {
uint32_t value;
} poslist_t, *ppostlist_t;
typedef struct {
word_t word;
uint32_t *directCount;
ppostlist_t directPos;
/**
* primer_t structure store fuzzy match positions for a primer
* on all sequences
*/
uint32_t *reverseCount;
ppostlist_t reversePos;
bool_t good;
uint32_t inexample;
uint32_t outexample;
typedef struct {
word_t word; //< code for the primer
uint32_t *directCount; //< Occurrence count on direct strand
ppostlist_t directPos; //< list of position list on direct strand
uint32_t *reverseCount; //< Occurrence count on reverse strand
ppostlist_t reversePos; //< list of position list on reverse strand
bool_t good; //< primer match more than quorum example and no
// more counterexample quorum.
uint32_t inexample; //< count of example sequences matching primer
uint32_t outexample; //< count of counterexample sequences matching primer
} primer_t, *pprimer_t;
/**
* primercount_t structure store fuzzy match positions for all primers
* on all sequences as a list of primer_t
*/
typedef struct {
pprimer_t primers;
pprimer_t primers;
uint32_t size;
} primercount_t, *pprimercount_t;
typedef struct {
word_t word;
pprimer_t primer;
uint32_t position;
bool_t strand;
bool_t good; /*TR: Added*/
} primermatch_t, *pprimermatch_t;
/*TR: Added*/
@ -109,6 +120,19 @@ typedef struct {
uint32_t matchcount;
} primermatchcount_t, *pprimermatchcount_t;
typedef struct {
pecoseq_t sequence;
bool_t strand;
const char *amplifia;
int32_t length;
} amplifia_t, *pamplifia_t;
typedef struct {
pamplifia_t amplifias;
uint32_t ampcount;
uint32_t ampslot;
} amplifiacount_t, *pamplifiacount_t;
typedef struct {
char *amplifia;
int32_t *taxonids;
@ -124,30 +148,52 @@ typedef struct {
} taxampset_t, *ptaxampset_t;
typedef struct {
word_t w1;
word_t w2;
uint32_t inexample; /*inexample count*/
uint32_t outexample; /*outexample count*/
uint32_t mind;
uint32_t maxd;
uint32_t ampsetcount;
uint32_t ampsetindex;
pampseqset_t ampset;
uint32_t taxsetcount;
uint32_t taxsetindex;
ptaxampset_t taxset;
uint32_t oktaxoncount;
} pairs_t, *ppairs_t;
pprimer_t p1;
bool_t asdirect1;
pprimer_t p2;
bool_t asdirect2;
amplifiacount_t pcr;
uint32_t inexample; //< example sequence count
uint32_t outexample; //< counterexample sequence count
uint32_t intaxa; //< example taxa count
uint32_t outtaxa; //< counterexample taxa count
uint32_t notwellidentifiedtaxa;
// these statistics are relative to inexample sequences
uint32_t mind; //< minimum distance between primers
uint32_t maxd; //< maximum distance between primers
uint32_t sumd; //< distance sum
float yule;
float quorumin;
float quorumout;
//
// uint32_t taxsetcount;
// uint32_t taxsetindex;
// ptaxampset_t taxset;
//
// uint32_t oktaxoncount;
} pair_t, *ppair_t;
/*TR: Added*/
typedef struct {
ppairs_t pairs;
uint32_t paircount;
}pairscount_t, *ppairscount_t;
size_t paircount;
size_t pairslots;
void* next;
pair_t pairs[1];
} pairlist_t, *ppairlist_t;
typedef struct {
ppairlist_t first;
ppairlist_t last;
void *tree;
int32_t count;
} pairtree_t, *ppairtree_t;
typedef struct {
pword_t words;
@ -168,6 +214,18 @@ typedef struct {
uint32_t size;
} merge_t, *pmerge_t;
typedef struct {
const char *amplifia;
bool_t strand;
int32_t length;
int32_t taxoncount;
void *taxontree;
}amptotaxon_t, *pamptotaxon_t;
typedef struct {
int32_t taxid;
void *amptree;
}taxontoamp_t, *ptaxontoamp_t;
typedef struct {
uint32_t lmin; //**< Amplifia minimal length
@ -189,6 +247,14 @@ typedef struct {
bool_t no_multi_match;
char taxonrank[20]; //TR to count ranks against a pair
int32_t taxonrankidx; //TR to count ranks against a pair
// Some statistics useful for options filters
int32_t dbsize;
int32_t insamples;
int32_t outsamples;
int32_t intaxa;
int32_t outtaxa;
} options_t, *poptions_t;
typedef ecoseq_t **pecodnadb_t;
@ -232,7 +298,21 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3
void sortmatch(pprimermatch_t table,uint32_t N);
ppairtree_t initpairtree(ppairtree_t tree);
ppair_t pairintree (pair_t key,ppairtree_t pairlist);
ppair_t insertpair(pair_t key,ppairtree_t list);
/*TR: Added*/
pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options);
ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options);
int32_t counttaxon(int32_t taxid);
int32_t getrankdbstats(pecodnadb_t seqdb,
uint32_t seqdbsize,
ecotaxonomy_t *taxonomy,
poptions_t options);
float taxonomycoverage(ppair_t pair, poptions_t options);
char ecoComplementChar(char base);
void taxonomyspecificity (ppair_t pair);
#endif /* EPSORT_H_ */

View File

@ -0,0 +1,17 @@
goodtaxon.o goodtaxon.P : goodtaxon.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -0,0 +1,17 @@
hashsequence.o hashsequence.P : hashsequence.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -201,3 +201,8 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word)
return ~0;
}
char ecoComplementChar(char base)
{
return (base < 4)? !base & 3: 4;
}

View File

@ -0,0 +1,17 @@
libstki.o libstki.P : libstki.c /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h libstki.h ecotype.h ecoprimer.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
../libecoPCR/ecoPCR.h apat.h debug.h

17
src/libecoprimer/merge.P Normal file
View File

@ -0,0 +1,17 @@
merge.o merge.P : merge.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

17
src/libecoprimer/pairs.P Normal file
View File

@ -0,0 +1,17 @@
pairs.o pairs.P : pairs.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/string.h

View File

@ -7,34 +7,40 @@
#include "ecoprimer.h"
#include <string.h>
#include <stdlib.h>
primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options);
static void buildPrimerPairsForOneSeq(uint32_t seqid,
pecodnadb_t seqdb,
pprimercount_t primers,
ppairtree_t pairs,
poptions_t options);
int32_t pairinlist (ppairs_t pairlist, word_t w1, word_t w2, uint32_t size)
{
uint32_t i;
for (i = 0; i < size; i++)
{
if (w1 == pairlist[i].w1 && w2 == pairlist[i].w2) return i;
if (w1 == pairlist[i].w2 && w2 == pairlist[i].w1) return i;
}
return -1;
}
char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid)
/*************************************
*
* pair collection management
*
*************************************/
#ifdef MASKEDCODE
char *addamplifiasetelem (ppair_t pair, char* amplifia, int32_t taxid)
{
uint32_t i;
uint32_t j;
char *ampused = NULL;
if(pair->ampsetcount == 0)
{
pair->ampsetcount = 500;
pair->ampsetindex = 0;
pair->ampset = ECOMALLOC(pair->ampsetcount * sizeof(ampseqset_t),"Cannot allocate amplifia set");
}
for (i = 0; i < pair->ampsetindex; i++)
{
if (strcmp (pair->ampset[i].amplifia, amplifia) == 0)
@ -43,43 +49,43 @@ char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid)
break;
}
}
if (i == 0)
{
pair->ampset[i].seqidcount = 100;
pair->ampset[i].seqidindex = 0;
pair->ampset[i].taxonids = ECOMALLOC(pair->ampset[i].seqidcount * sizeof(uint32_t),"Cannot allocate amplifia sequence table");
}
if (pair->ampsetindex == pair->ampsetcount)
{
pair->ampsetcount += 500;
pair->ampset = ECOREALLOC(pair->ampset, pair->ampsetcount * sizeof(ampseqset_t), "Cannot allocate amplifia set");
}
if (pair->ampset[i].seqidindex == pair->ampset[i].seqidcount)
{
pair->ampset[i].seqidcount += 100;
pair->ampset[i].taxonids = ECOREALLOC(pair->ampset[i].taxonids, pair->ampset[i].seqidcount * sizeof(int32_t), "Cannot allocate amplifia sequence table");
}
if (pair->ampset[i].amplifia == NULL)
{
pair->ampset[i].amplifia = amplifia;
pair->ampsetindex++;
}
for (j = 0; j < pair->ampset[i].seqidindex; j++)
{
if (pair->ampset[i].taxonids[j] == taxid) break;
}
if (j == pair->ampset[i].seqidindex)
pair->ampset[i].taxonids[pair->ampset[i].seqidindex++] = taxid;
return ampused;
}
void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
void addtaxampsetelem (ppair_t pair, int32_t taxid, char *amplifia)
{
uint32_t i;
uint32_t j;
@ -90,42 +96,42 @@ void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
pair->taxsetindex = 0;
pair->taxset = ECOMALLOC(pair->taxsetcount * sizeof(taxampset_t),"Cannot allocate taxon set");
}
for (i = 0; i < pair->taxsetindex; i++)
{
if (pair->taxset[i].taxonid == taxid) break;
}
if (i == 0)
{
pair->taxset[i].amplifiacount = 100;
pair->taxset[i].amplifiaindex = 0;
pair->taxset[i].amplifia = ECOMALLOC(pair->taxset[i].amplifiacount * sizeof(char *),"Cannot allocate amplifia table");
}
if (pair->taxsetindex == pair->taxsetcount)
{
pair->taxsetcount += 500;
pair->taxset = ECOREALLOC(pair->taxset, pair->taxsetcount * sizeof(taxampset_t), "Cannot allocate taxon set");
}
if (pair->taxset[i].amplifiaindex == pair->taxset[i].amplifiacount)
{
pair->taxset[i].amplifiacount += 100;
pair->taxset[i].amplifia = ECOREALLOC(pair->taxset[i].amplifia, pair->taxset[i].amplifiacount * sizeof(char *), "Cannot allocate amplifia table");
}
if (pair->taxset[i].taxonid == 0)
{
pair->taxset[i].taxonid = taxid;
pair->taxsetindex++;
}
for (j = 0; j < pair->taxset[i].amplifiaindex; j++)
{
if (strcmp(pair->taxset[i].amplifia[j], amplifia) == 0) break;
}
if (j == pair->taxset[i].amplifiaindex)
{
pair->taxset[i].amplifia[j] = amplifia;
@ -135,140 +141,62 @@ void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len)
{
fprintf(stderr,"start : %d length : %d\n",start,len);
char *amplifia = ECOMALLOC((len + 1) * sizeof(char),"Cannot allocate amplifia");
char *seqc = &seq->SQ[start];
strncpy(amplifia, seqc, len);
return amplifia;
}
#endif
/*TR: Added*/
pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
{
uint32_t i;
uint32_t j;
uint32_t k;
uint32_t d;
uint32_t strt;
uint32_t end;
uint32_t paircount = 0;
uint32_t pairslots = 500;
int32_t foundindex;
ppairs_t pairs;
pairscount_t primerpairs;
primermatchcount_t seqmatchcount;
word_t w1;
word_t w2;
char *amplifia;
char *oldamp;
ppairtree_t primerpairs;
pairs = ECOMALLOC(pairslots * sizeof(pairs_t),"Cannot allocate pairs table");
primerpairs = initpairtree(NULL);
for (i=0; i < seqdbsize; i++)
{
seqmatchcount = buildPrimerPairsForOneSeq(i, primers, options);
if (seqmatchcount.matchcount == 0) continue;
for (j=0; j < seqmatchcount.matchcount; j++)
{
strt = 0;
w1 = seqmatchcount.matches[j].word;
/*first word should b on direct strand*/
if (!seqmatchcount.matches[j].strand)
w1 = ecoComplementWord(w1, options->primer_length);
else
strt = options->primer_length;
for (k=j+1; k < seqmatchcount.matchcount; k++)
{
end = 0;
w2 = seqmatchcount.matches[k].word;
/*second word should be on reverse strand*/
if (seqmatchcount.matches[k].strand)
w2 = ecoComplementWord(w2, options->primer_length);
else
end = options->primer_length;
if (!(seqmatchcount.matches[j].good || seqmatchcount.matches[k].good)) continue;
if (w1 == w2) continue;
d = seqmatchcount.matches[k].position - seqmatchcount.matches[j].position;
if (d >= options->lmin && d <= options->lmax)
{
/*get amplified string*/
amplifia = getamplifia (seqdb[i], seqmatchcount.matches[j].position + strt, d - strt - end);
foundindex = pairinlist(pairs, w1, w2, paircount);
if (foundindex != -1) /*pair is found*/
{
if (seqdb[i]->isexample)
pairs[foundindex].inexample++;
else
pairs[foundindex].outexample++;
if (pairs[foundindex].mind > d) pairs[foundindex].mind = d;
else if (pairs[foundindex].maxd < d) pairs[foundindex].maxd = d;
oldamp = addamplifiasetelem (&pairs[foundindex], amplifia, seqdb[i]->ranktaxonid);
/*if exact same string is already in amplifia set then use that for taxon set, it will help for
* calculating the fully identified taxons i.e specificity, we will compare pointrs instead of strings
* because same string means same pointer*/
if (oldamp)
{
ECOFREE (amplifia, "free amplifia");
amplifia = oldamp;
}
addtaxampsetelem (&pairs[foundindex], seqdb[i]->ranktaxonid, amplifia);
continue;
}
if (paircount == pairslots)
{
pairslots += 500;
pairs = ECOREALLOC(pairs, pairslots * sizeof(pairs_t), "Cannot allocate pairs table");
}
pairs[paircount].w1 = w1;
pairs[paircount].w2 = w2;
if (seqdb[i]->isexample) pairs[paircount].inexample = 1;
else pairs[paircount].outexample = 1;
pairs[paircount].mind = d;
pairs[paircount].maxd = d;
oldamp = addamplifiasetelem (&pairs[paircount], amplifia, seqdb[i]->ranktaxonid);
addtaxampsetelem (&pairs[paircount], seqdb[i]->ranktaxonid, amplifia);
paircount++;
}
else if (d > options->lmax)
break; /*once if the distance is greater than lmax then it will keep on increasing*/
}
}
ECOFREE(seqmatchcount.matches, "Cannot free matches table");
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
}
primerpairs.pairs = ECOREALLOC(pairs, paircount * sizeof(pairs_t), "Cannot allocate pairs table");
primerpairs.paircount = paircount;
return primerpairs;
}
primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options)
{
uint32_t i,j,k;
uint32_t matchcount=0;
pprimermatch_t matches = NULL;
primermatchcount_t seqmatchcount;
#define DMAX (2000000000)
static void buildPrimerPairsForOneSeq(uint32_t seqid,
pecodnadb_t seqdb,
pprimercount_t primers,
ppairtree_t pairs,
poptions_t options)
{
static uint32_t paircount=0;
uint32_t i,j,k;
uint32_t matchcount=0;
pprimermatch_t matches = NULL;
primermatchcount_t seqmatchcount;
ppair_t pcurrent;
pair_t current;
pprimer_t wswp;
bool_t bswp;
size_t distance;
bool_t strand;
seqmatchcount.matchcount = 0;
seqmatchcount.matches = NULL;
for (i=0;i < primers->size; i++)
{
matchcount+=primers->primers[i].directCount[seqid];
matchcount+=primers->primers[i].reverseCount[seqid];
}
if (matchcount <= 0) return seqmatchcount;
if (matchcount <= 0)
return;
matches = ECOMALLOC(matchcount * sizeof(primermatch_t),"Cannot allocate primers match table");
for (i=0,j=0;i < primers->size; i++)
@ -277,17 +205,15 @@ primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t prime
{
if (primers->primers[i].directCount[seqid]==1)
{
matches[j].word = primers->primers[i].word;
matches[j].primer = primers->primers+i;
matches[j].strand=TRUE;
matches[j].good=primers->primers[i].good;/*TR: Added*/
matches[j].position=primers->primers[i].directPos[seqid].value;
j++;
}
else for (k=0; k < primers->primers[i].directCount[seqid]; k++,j++)
{
matches[j].word = primers->primers[i].word;
matches[j].primer = primers->primers+i;
matches[j].strand=TRUE;
matches[j].good=primers->primers[i].good;/*TR: Added*/
matches[j].position=primers->primers[i].directPos[seqid].pointer[k];
}
}
@ -296,26 +222,144 @@ primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t prime
{
if (primers->primers[i].reverseCount[seqid]==1)
{
matches[j].word = primers->primers[i].word;
matches[j].primer = primers->primers+i;
matches[j].strand=FALSE;
matches[j].good=primers->primers[i].good;/*TR: Added*/
matches[j].position=primers->primers[i].reversePos[seqid].value;
j++;
}
else for (k=0; k < primers->primers[i].reverseCount[seqid]; k++,j++)
{
matches[j].word = primers->primers[i].word;
matches[j].primer = primers->primers+i;
matches[j].strand=FALSE;
matches[j].good=primers->primers[i].good;/*TR: Added*/
matches[j].position=primers->primers[i].reversePos[seqid].pointer[k];
}
}
}
sortmatch(matches,matchcount); // sort in asscending order by position
/*TR: Added*/
seqmatchcount.matches = matches;
seqmatchcount.matchcount = matchcount;
return seqmatchcount;
if (matchcount>1)
{
// fprintf(stderr,"\n====================================\n");
sortmatch(matches,matchcount); // sort in ascending order by position
for (i=0; i < matchcount;i++)
{
// For all primers matching the sequence
for(j=i+1;
(j<matchcount)
&& ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
j++
)
// For all not too far primers
if ( (matches[i].primer->good || matches[j].primer->good)
&& (distance > options->lmin)
)
{
// If possible primer pair
current.p1 = matches[i].primer;
current.asdirect1=matches[i].strand;
current.p2 = matches[j].primer;
current.asdirect2= !matches[j].strand;
current.maxd=DMAX;
current.mind=DMAX;
current.sumd=0;
current.inexample=0;
current.outexample=0;
// Standardize the pair
strand = current.p2->word > current.p1->word;
if (!strand)
{
wswp = current.p1;
current.p1=current.p2;
current.p2=wswp;
bswp = current.asdirect1;
current.asdirect1=current.asdirect2;
current.asdirect2=bswp;
}
// Look for the new pair in already seen pairs
pcurrent = insertpair(current,pairs);
if (seqdb[seqid]->isexample)
{
pcurrent->inexample++;
pcurrent->sumd+=distance;
if ((pcurrent->maxd==DMAX) || (distance > pcurrent->maxd))
pcurrent->maxd = distance;
if (distance < pcurrent->mind)
pcurrent->mind = distance;
}
else
pcurrent->outexample++;
if ((pcurrent->outexample+pcurrent->inexample)==1)
{
paircount++;
pcurrent->pcr.ampslot=200;
pcurrent->pcr.ampcount=0;
pcurrent->pcr.amplifias = ECOMALLOC(sizeof(amplifia_t)*pcurrent->pcr.ampslot,
"Cannot allocate amplifia table");
}
else
{
if (pcurrent->pcr.ampslot==pcurrent->pcr.ampcount)
{
pcurrent->pcr.ampslot+=200;
pcurrent->pcr.amplifias = ECOREALLOC(pcurrent->pcr.amplifias,
sizeof(amplifia_t)*pcurrent->pcr.ampslot,
"Cannot allocate amplifia table");
}
}
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].length=distance;
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].sequence=seqdb[seqid];
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].strand=strand;
if (strand)
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[i].position + options->primer_length;
else
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
pcurrent->pcr.ampcount++;
// fprintf(stderr,"%c%c W1 : %s direct : %c",
// "bG"[(int)pcurrent->p1->good],
// "bG"[(int)pcurrent->p2->good],
// ecoUnhashWord(pcurrent->p1->word, options->primer_length),
// "><"[(int)pcurrent->asdirect1]
// );
//
// fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n",
// ecoUnhashWord(pcurrent->p2->word, options->primer_length),
// "><"[(int)pcurrent->asdirect2],
// distance,
// pcurrent->mind,pcurrent->maxd,
// (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0,
// pcurrent->inexample,pcurrent->outexample,
// " N"[(pcurrent->outexample+pcurrent->inexample)==1],
// paircount
//
// );
//
}
}
}
pairs->count=paircount;
}

View File

@ -0,0 +1,17 @@
pairtree.o pairtree.P : pairtree.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/search.h

136
src/libecoprimer/pairtree.c Normal file
View File

@ -0,0 +1,136 @@
/*
* pairtree.c
*
* Created on: 7 mars 2009
* Author: coissac
*/
#include "ecoprimer.h"
#include <search.h>
static void cleanpair(ppair_t pair);
static void deletepairlist(ppairlist_t list);
static int cmppair(const void* p1,const void*p2);
static void cleanamplifiatlist(pamplifiacount_t list)
{
if (list->amplifias)
ECOFREE(list->amplifias,
"Free amplifia list");
}
static void cleanpair(ppair_t pair)
{
cleanamplifiatlist(&(pair->pcr));
}
static ppairlist_t newpairlist(ppairlist_t parent, size_t size)
{
ppairlist_t tmp;
tmp=ECOMALLOC(sizeof(pairlist_t)+sizeof(pair_t)*(size-1),
"Cannot allocate new pair list");
tmp->pairslots=size;
tmp->paircount=0;
tmp->next=NULL;
if (parent)
parent->next=(void*)tmp;
return tmp;
}
static void deletepairlist(ppairlist_t list)
{
size_t i;
if (list)
{
if (list->next)
{
deletepairlist(list->next);
list->next=NULL;
}
for (i=0; i < list->paircount; i++)
cleanpair((list->pairs)+i);
ECOFREE(list,"Delete pair list");
}
}
static int cmppair(const void* p1,const void*p2)
{
ppair_t pr1,pr2;
pr1=(ppair_t)p1;
pr2=(ppair_t)p2;
if (pr1->p1 < pr2->p1) return -1;
if (pr1->p1 > pr2->p1) return 1;
if (pr1->asdirect1 < pr2->asdirect1) return -1;
if (pr1->asdirect1 > pr2->asdirect1) return 1;
if (pr1->p2 < pr2->p2) return -1;
if (pr1->p2 > pr2->p2) return 1;
if (pr1->asdirect2 < pr2->asdirect2) return -1;
if (pr1->asdirect2 > pr2->asdirect2) return 1;
return 0;
}
ppair_t pairintree (pair_t key,
ppairtree_t pairlist)
{
if (!pairlist->tree)
return NULL;
return *((ppair_t*)tsearch((const void *)(&key),
&(pairlist->tree),
cmppair
));
}
ppair_t insertpair(pair_t key,
ppairtree_t list)
{
ppair_t current;
ppair_t found;
if (list->last->paircount==list->last->pairslots)
{
list->last->next=newpairlist(list->last,100);
list->last=list->last->next;
}
current = list->last->pairs + list->last->paircount;
*current=key;
found = *((ppair_t*)tsearch((const void *)current,
&(list->tree),
cmppair));
if (found==current)
list->last->paircount++;
return found;
}
ppairtree_t initpairtree(ppairtree_t tree)
{
if (!tree)
tree = ECOMALLOC(sizeof(pairtree_t),"Cannot allocate pair tree");
tree->first=newpairlist(NULL,300);
tree->last=tree->first;
tree->tree=NULL;
tree->count=0;
return tree;
}

17
src/libecoprimer/queue.P Normal file
View File

@ -0,0 +1,17 @@
queue.o queue.P : queue.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -0,0 +1,17 @@
readdnadb.o readdnadb.P : readdnadb.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -0,0 +1,10 @@
smothsort.o smothsort.P : smothsort.c /usr/include/assert.h /usr/include/sys/cdefs.h \
/usr/include/stdio.h /usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/sys/types.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/sys/_structs.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

View File

@ -0,0 +1,17 @@
sortmatch.o sortmatch.P : sortmatch.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -0,0 +1,17 @@
sortword.o sortword.P : sortword.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -0,0 +1,18 @@
strictprimers.o strictprimers.P : strictprimers.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/string.h /usr/include/math.h \
/usr/include/architecture/i386/math.h

224
src/libecoprimer/taxstats.c Normal file
View File

@ -0,0 +1,224 @@
/*
* taxstats.c
*
* Created on: 12 mars 2009
* Author: coissac
*/
#include <search.h>
#include "ecoprimer.h"
static int cmptaxon(const void *t1, const void* t2);
static int cmptaxon(const void *t1, const void* t2)
{
const size_t taxid1=(size_t)t1;
const size_t taxid2=(size_t)t2;
// fprintf(stderr,"==> counted taxid1 : %d\n",taxid1);
// fprintf(stderr,"==> counted taxid2 : %d\n",taxid2);
if (taxid1 < taxid2)
return -1;
if (taxid1 > taxid2)
return +1;
return 0;
}
int32_t counttaxon(int32_t taxid)
{
static void* taxontree=NULL;
static int32_t taxoncount=0;
// fprintf(stderr,"counted taxid : %d taxontree %p\n",taxid,taxontree);
if (taxid==-1)
{
if (taxontree)
ECOFREE(taxontree,"Free taxon tree");
taxontree=NULL;
taxoncount=0;
return 0;
}
if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))))
{
tsearch((void*)((size_t)taxid),&taxontree,cmptaxon);
taxoncount++;
}
return taxoncount;
}
int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
poptions_t options)
{
uint32_t i;
ecotx_t *taxon;
ecotx_t *tmptaxon;
counttaxon(-1);
for (i=0;i<seqdbsize;i++)
{
taxon = &(taxonomy->taxons->taxon[seqdb[i]->taxid]);
seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
tmptaxon = eco_findtaxonatrank(taxon,
options->taxonrankidx);
// fprintf(stderr,"Taxid : %d %p\n",taxon->taxid,tmptaxon);
if (tmptaxon)
{
// fprintf(stderr,"orig : %d trans : %d\n",taxon->taxid,
// tmptaxon->taxid);
seqdb[i]->ranktaxonid=tmptaxon->taxid;
if (seqdb[i]->isexample)
options->intaxa = counttaxon(tmptaxon->taxid);
}
else
seqdb[i]->ranktaxonid=-1;
}
counttaxon(-1);
for (i=0;i<seqdbsize;i++)
{
if (seqdb[i]->ranktaxonid>=0 && !seqdb[i]->isexample)
options->outtaxa = counttaxon(seqdb[i]->ranktaxonid);
}
return options->outtaxa + options->intaxa;
}
float taxonomycoverage(ppair_t pair, poptions_t options)
{
int32_t seqcount;
int32_t i;
int32_t incount=0;
int32_t outcount=0;
seqcount=pair->pcr.ampcount;
counttaxon(-1);
for (i=0; i < seqcount; i++)
if (pair->pcr.amplifias[i].sequence->isexample)
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
counttaxon(-1);
for (i=0; i < seqcount; i++)
if (!pair->pcr.amplifias[i].sequence->isexample)
outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
pair->intaxa=incount;
pair->outtaxa=outcount;
return (float)incount/options->intaxa;
}
static int cmpamp(const void *ampf1, const void* ampf2)
{
int i;
int j = 0;
int incr = 1;
char cd1;
char cd2;
int chd = 0;
int len = 0;
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
if (pampf1->strand != pampf2->strand)
{
incr = -1;
j = pampf1->length - 1;
if (pampf2->strand)
{
pampf1 = (pamptotaxon_t) ampf2;
pampf2 = (pamptotaxon_t) ampf1;
chd = 1;
}
}
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
for (i = 0; i < len; i++, j += incr)
{
cd1 = pampf1->amplifia[i];
if (incr == -1)
cd2 = ecoComplementChar(pampf2->amplifia[j]);
else
cd2 = pampf2->amplifia[j];
if (cd1 < cd2) return chd ? 1: -1;
if (cd2 < cd1) return chd ? -1: 1;
}
if (pampf1->length > pampf2->length) return chd ? -1: 1;
if (pampf2->length > pampf1->length) return chd ? 1: -1;
return 0;
}
void twalkaction (const void *node, VISIT order, int level)
{
const size_t taxid=(size_t)node;
counttaxon(taxid);
}
void taxonomyspecificity (ppair_t pair)
{
uint32_t i;
uint32_t ampfindex = 0;
int32_t taxid;
void *ampftree = NULL;
pamptotaxon_t pcurrentampf;
pamptotaxon_t *ptmp;
pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree");
for (i = 0; i < pair->pcr.ampcount; i++)
{
/*populate taxon ids tree against each unique amplifia
i.e set of taxon ids for each amplifia*/
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
pcurrentampf = &ampfwithtaxtree[ampfindex];
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
ptmp = tfind((const void*)pcurrentampf, &ampftree, cmpamp);
if (ptmp == NULL)
{
pcurrentampf = &ampfwithtaxtree[ampfindex];
tsearch((void*)pcurrentampf,&ampftree,cmpamp);
ampfindex++;
}
else
pcurrentampf = *ptmp;
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
{
pcurrentampf->taxoncount++;
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
}
}
counttaxon(-1);
for (i = 0; i < ampfindex; i++)
{
if (ampfwithtaxtree[i].taxoncount > 1)
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
}
pair->notwellidentifiedtaxa = counttaxon(-2);
ECOFREE (ampfwithtaxtree, "Free amplifia table");
}