Added code for building sets of primers and -p command line option
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@279 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "libecoprimer/ecoprimer.h"
|
||||
#include "libecoprimer/PrimerSets.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
@ -74,6 +75,7 @@ static void PrintHelp()
|
||||
PP "-A : Print the list of all identifier of sequences present in the database\n\n");
|
||||
PP "-f : Remove data mining step during strict primer identification\n\n");
|
||||
PP "-v : Store statistic file about memory usage during strict primer identification\n\n");
|
||||
PP "-p : Print sets of primers\n\n");
|
||||
PP "\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "Table result description : \n");
|
||||
@ -132,9 +134,9 @@ void initoptions(poptions_t options)
|
||||
options->refseq=NULL;
|
||||
options->circular=0;
|
||||
options->doublestrand=1;
|
||||
options->strict_quorum=0.7;
|
||||
options->strict_quorum=0.3;
|
||||
options->strict_exclude_quorum=0.1;
|
||||
options->sensitivity_quorum=0.9;
|
||||
options->sensitivity_quorum=0.3;
|
||||
options->false_positive_quorum=0.1;
|
||||
options->strict_three_prime=0;
|
||||
options->r=0;
|
||||
@ -146,6 +148,7 @@ void initoptions(poptions_t options)
|
||||
options->saltmethod = SALT_METHOD_SANTALUCIA;
|
||||
options->salt = DEF_SALT;
|
||||
options->printAC=FALSE;
|
||||
options->print_sets_of_primers = FALSE;
|
||||
}
|
||||
|
||||
void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
@ -314,7 +317,7 @@ static int cmpprintedpairs(const void* p1,const void* p2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
|
||||
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options, pecodnadb_t seqdb)
|
||||
{
|
||||
uint32_t i,j;
|
||||
float q,qfp;
|
||||
@ -329,6 +332,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
|
||||
else qfp=0.0;
|
||||
|
||||
sortedpairs[i]->wellIdentifiedSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
|
||||
sortedpairs[i]->quorumin = q;
|
||||
sortedpairs[i]->quorumout = qfp;
|
||||
sortedpairs[i]->yule = q - qfp;
|
||||
@ -337,8 +341,10 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
if (q > options->sensitivity_quorum &&
|
||||
qfp < options->false_positive_quorum)
|
||||
{
|
||||
//TR 05/09/10 - wellIdentified needed for primer sets
|
||||
sortedpairs[j]->wellIdentifiedSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array");
|
||||
(void)taxonomycoverage(sortedpairs[j],options);
|
||||
taxonomyspecificity(sortedpairs[j]);
|
||||
taxonomyspecificity(sortedpairs[j], seqdb, options->dbsize);
|
||||
j++;
|
||||
}
|
||||
|
||||
@ -348,7 +354,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
}
|
||||
|
||||
|
||||
void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
|
||||
void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy, pecodnadb_t seqdb)
|
||||
{
|
||||
ppair_t* sortedpairs;
|
||||
ppair_t* index;
|
||||
@ -357,7 +363,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
|
||||
size_t count;
|
||||
char *taxon[]={"taxon","taxa"};
|
||||
ecotx_t *current_taxon;
|
||||
|
||||
pairset pair_sets;
|
||||
|
||||
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
|
||||
|
||||
@ -377,7 +383,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
|
||||
for (i=0;i<pl->paircount;i++,j++)
|
||||
sortedpairs[j]=pl->pairs+i;
|
||||
|
||||
count=filterandsortpairs(sortedpairs,pairs->count,options);
|
||||
count=filterandsortpairs(sortedpairs,pairs->count,options, seqdb);
|
||||
getThermoProperties(sortedpairs, count, options);
|
||||
|
||||
fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count);
|
||||
@ -441,8 +447,12 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
|
||||
|
||||
for (i=0;i < count;i++)
|
||||
printapair(i,sortedpairs[i],options);
|
||||
|
||||
|
||||
|
||||
if (options->print_sets_of_primers == TRUE)
|
||||
{
|
||||
pair_sets = build_primers_set (sortedpairs, count, seqdb, options);
|
||||
some_other_set_possibilities (&pair_sets, sortedpairs, count, seqdb, options);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -528,7 +538,7 @@ int main(int argc, char **argv)
|
||||
|
||||
initoptions(&options);
|
||||
|
||||
while ((carg = getopt(argc, argv, "hAfvcUDSE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) {
|
||||
while ((carg = getopt(argc, argv, "hAfvcUDSpE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* ---------------------------- */
|
||||
@ -691,6 +701,12 @@ int main(int argc, char **argv)
|
||||
options.circular = 1;
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'p': /* print sets of primers */
|
||||
/* --------------------------------- */
|
||||
options.print_sets_of_primers = TRUE;
|
||||
break;
|
||||
|
||||
case '?': /* bad option */
|
||||
/* -------------------- */
|
||||
errflag++;
|
||||
@ -799,9 +815,9 @@ int main(int argc, char **argv)
|
||||
|
||||
fprintf(stderr,"\n");
|
||||
|
||||
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
|
||||
|
||||
printpairs (pairs, &options,taxonomy);
|
||||
|
||||
|
||||
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
|
||||
printpairs (pairs, &options,taxonomy, seqdb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -14,7 +14,8 @@ SOURCES = goodtaxon.c \
|
||||
pairs.c \
|
||||
taxstats.c \
|
||||
apat_search.c \
|
||||
filtering.c
|
||||
filtering.c \
|
||||
PrimerSets.c
|
||||
|
||||
SRCS=$(SOURCES)
|
||||
|
||||
|
388
src/libecoprimer/PrimerSets.c
Normal file
388
src/libecoprimer/PrimerSets.c
Normal file
@ -0,0 +1,388 @@
|
||||
#include <stdint.h>
|
||||
|
||||
//#include "ecoprimer.h"
|
||||
#include "PrimerSets.h"
|
||||
|
||||
int32_t counttaxon(int32_t taxid);
|
||||
|
||||
float get_set_coverage (pairset *p_set, SetParams *pparams, int32_t pidx_toexclude)
|
||||
{
|
||||
int32_t i, j;
|
||||
float cov;
|
||||
pprimer_t prmr1;
|
||||
pprimer_t prmr2;
|
||||
int32_t s_intaxa = 0;
|
||||
|
||||
counttaxon(-1);
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
{
|
||||
if (p_set->set_pairs[i] == -1 || pidx_toexclude == i) continue;
|
||||
|
||||
prmr1 = pparams->sortedpairs[p_set->set_pairs[i]]->p1;
|
||||
prmr2 = pparams->sortedpairs[p_set->set_pairs[i]]->p2;
|
||||
for (j = 0; j < pparams->options->dbsize; j++)
|
||||
if ((prmr1->directCount[j] > 0 || prmr1->reverseCount[j] > 0)
|
||||
&& (prmr2->directCount[j] > 0 || prmr2->reverseCount[j] > 0)
|
||||
&& pparams->seqdb[j]->isexample && pparams->seqdb[j]->ranktaxonid > 0)
|
||||
{
|
||||
s_intaxa = counttaxon(pparams->seqdb[j]->ranktaxonid);
|
||||
}
|
||||
}
|
||||
//fprintf(stderr, "%d/%d\n", s_intaxa, pparams->options->intaxa);
|
||||
cov = s_intaxa*1.0/(pparams->options->intaxa*1.0);
|
||||
|
||||
return cov;
|
||||
}
|
||||
|
||||
void set_cov_spc (pairset *p_set, SetParams *pparams)
|
||||
{
|
||||
int32_t i;
|
||||
int32_t ssp = 0;
|
||||
|
||||
for (i = 0; i < pparams->options->dbsize; i++)
|
||||
if (p_set->set_wellIdentifiedTaxa[i] == 1) ssp++;
|
||||
|
||||
//set specificity
|
||||
p_set->set_specificity = ssp*1.0/(pparams->options->insamples*1.0);
|
||||
|
||||
//set coverage
|
||||
p_set->set_coverage = get_set_coverage (p_set, pparams, -1);
|
||||
}
|
||||
|
||||
int ok_to_add (int id, pairset *pair_set)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
if (pair_set->set_pairs[i] == id) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
//1. Add in set the first pair having highiest specificity
|
||||
//2. For the sequences not WI by primers in set, calculate count for each pair equal to number of seqs WI by it
|
||||
//3. Take the pair with highiest such count and see its links with primers already in set,
|
||||
//4. If no/insufficient links, take next pair
|
||||
//5. repeate 3,4 untill pair gets added in set
|
||||
//6. Repeate 2 to 5 until set complete
|
||||
|
||||
pairset build_primers_set (ppair_t* sortedpairs, int32_t sorted_count, pecodnadb_t seqdb,
|
||||
poptions_t options)
|
||||
{
|
||||
SetParams params;
|
||||
pairset pair_set;
|
||||
int32_t i;
|
||||
int32_t pset_idx;
|
||||
int32_t prb_idx;
|
||||
int *pair_wi_count_sorted_ids;
|
||||
|
||||
params.sortedpairs = sortedpairs;
|
||||
params.sorted_count = sorted_count;
|
||||
params.seqdb = seqdb;
|
||||
params.options = options;
|
||||
|
||||
memset (&pair_set, 0, sizeof(pairset));
|
||||
pair_set.set_wellIdentifiedTaxa = (int *) ECOMALLOC(options->dbsize*sizeof (int),
|
||||
"Could not allocate memory for pair set");
|
||||
|
||||
pair_wi_count_sorted_ids = (int *) ECOMALLOC(sorted_count*sizeof (int),
|
||||
"Could not allocate memory for pair_wi_count_sorted");
|
||||
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
pair_set.set_pairs[i] = -1;
|
||||
|
||||
pset_idx = 0;
|
||||
prb_idx = 0;
|
||||
//add first pair by default, this should be the one having highiest specificty
|
||||
add_pair_in_set (&pair_set, pset_idx, prb_idx, ¶ms);
|
||||
pset_idx++;
|
||||
|
||||
while (pset_idx < PRIMERS_IN_SET_COUNT)
|
||||
{
|
||||
//get a sorted list of pair ids with the pair well identifying most of the remaining seqs at top
|
||||
get_next_pair_options (pair_wi_count_sorted_ids, &pair_set, ¶ms);
|
||||
|
||||
if (pair_wi_count_sorted_ids[0] == 0)
|
||||
{
|
||||
fprintf (stderr, "No further pair found, total so far %d\n", pset_idx);
|
||||
break;
|
||||
}
|
||||
else
|
||||
fprintf (stderr, "Some counts found\n");
|
||||
|
||||
|
||||
for (prb_idx = 0; prb_idx < sorted_count; prb_idx++)
|
||||
{
|
||||
if (pair_wi_count_sorted_ids[prb_idx])
|
||||
if (ok_to_add (pair_wi_count_sorted_ids[prb_idx], &pair_set))
|
||||
{
|
||||
//fprintf (stderr, "Oktoadd\n");
|
||||
float ldist = get_links_distribution (pair_wi_count_sorted_ids[prb_idx], &pair_set, ¶ms);
|
||||
fprintf (stderr, "ldist:%f\n", ldist);
|
||||
if (ldist >= 0.7)
|
||||
{
|
||||
add_pair_in_set (&pair_set, pset_idx, pair_wi_count_sorted_ids[prb_idx], ¶ms);
|
||||
pset_idx++;
|
||||
break;
|
||||
}
|
||||
else
|
||||
fprintf (stderr, "ldist:%f\n", ldist);
|
||||
}
|
||||
}
|
||||
|
||||
if (prb_idx == sorted_count)
|
||||
{
|
||||
fprintf (stderr, "No further pair found, total so far %d\n", pset_idx);
|
||||
break;
|
||||
}
|
||||
|
||||
if (pair_set.set_specificity == 1.0)
|
||||
{
|
||||
fprintf (stderr, "Set Complete with total primers: %d\n", pset_idx);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
get_set_mean_cov_stats (&pair_set, ¶ms);
|
||||
|
||||
return pair_set;
|
||||
}
|
||||
|
||||
float get_links_distribution (int prb_idx, pairset *pair_set, SetParams *pparams)
|
||||
{
|
||||
int i, j;
|
||||
int *pair_link_count;
|
||||
int *pwi;
|
||||
int *pswi;
|
||||
float pcnt = 0.0;
|
||||
float pscnt = 0.0;
|
||||
|
||||
pair_link_count = (int *) ECOMALLOC(PRIMERS_IN_SET_COUNT*sizeof (int),
|
||||
"Could not allocate memory for pair_link_count");
|
||||
|
||||
pwi = pparams->sortedpairs[prb_idx]->wellIdentifiedSeqs;
|
||||
//fprintf(stderr, "a,");
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
{
|
||||
pair_link_count[i] = 0;
|
||||
if (pair_set->set_pairs[i] != -1)
|
||||
{
|
||||
//fprintf(stderr, "b,");
|
||||
pswi = pparams->sortedpairs[pair_set->set_pairs[i]]->wellIdentifiedSeqs;
|
||||
for (j = 0; j < pparams->options->dbsize; j++)
|
||||
if (pwi[j] == 1 && pwi[j] == pswi[j])
|
||||
pair_link_count[i] += 1;
|
||||
}
|
||||
}
|
||||
//fprintf(stderr, "c,");
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
{
|
||||
if (pair_set->set_pairs[i] != -1)
|
||||
pscnt++;
|
||||
if (pair_link_count[i] > 0)
|
||||
pcnt++;
|
||||
}
|
||||
ECOFREE (pair_link_count, "Could not free memory for pair_link_count");
|
||||
//fprintf(stderr, "d,");
|
||||
return (pcnt/pscnt);
|
||||
}
|
||||
|
||||
|
||||
void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams)
|
||||
{
|
||||
int *pair_count;
|
||||
int32_t i, j;
|
||||
int max;
|
||||
int tmp;
|
||||
|
||||
pair_count = (int *) ECOMALLOC(pparams->sorted_count*sizeof (int),
|
||||
"Could not allocate memory for pair_count");
|
||||
|
||||
memset (pair_wi_count_sorted_ids, 0, pparams->sorted_count*sizeof(int));
|
||||
|
||||
for (i = 0; i < pparams->options->dbsize; i++)
|
||||
{
|
||||
if (pair_set->set_wellIdentifiedTaxa[i] == 1) continue;
|
||||
|
||||
for (j = 0; j < pparams->sorted_count; j++)
|
||||
{
|
||||
if (pparams->sortedpairs[j]->wellIdentifiedSeqs[i] == 1)
|
||||
pair_count[j] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
//set pair ids
|
||||
for (j = 0; j < pparams->sorted_count; j++)
|
||||
pair_wi_count_sorted_ids[j] = j;
|
||||
|
||||
//set count of primers already in set to zero (it should already be zero) just for testing
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
if (pair_set->set_pairs[i] != -1)
|
||||
pair_count[pair_set->set_pairs[i]] = 0;
|
||||
|
||||
//sort two arrays in descending wrt count
|
||||
for (i = 0; i < pparams->sorted_count - 1; i++)
|
||||
{
|
||||
max = i;
|
||||
for (j = i + 1; j < pparams->sorted_count; j++)
|
||||
if (pair_count[max] < pair_count[j])
|
||||
max = j;
|
||||
|
||||
if (max > i)
|
||||
{
|
||||
tmp = pair_count[i];
|
||||
pair_count[i] = pair_count[max];
|
||||
pair_count[max] = tmp;
|
||||
|
||||
tmp = pair_wi_count_sorted_ids[i];
|
||||
pair_wi_count_sorted_ids[i] = pair_wi_count_sorted_ids[max];
|
||||
pair_wi_count_sorted_ids[max] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < pparams->sorted_count - 1; i++)
|
||||
if (pair_count[i] == 0)
|
||||
pair_wi_count_sorted_ids[i] = 0;
|
||||
//else
|
||||
// fprintf (stderr, "%d:%d, ", i, pair_count[i]);
|
||||
|
||||
ECOFREE (pair_count, "Could not free memory for pair_count");
|
||||
}
|
||||
|
||||
void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams)
|
||||
{
|
||||
int *pwi;
|
||||
int32_t i;
|
||||
|
||||
pair_set->set_pairs[pset_idx] = prb_idx;
|
||||
pwi = pparams->sortedpairs[prb_idx]->wellIdentifiedSeqs;
|
||||
for (i = 0; i < pparams->options->dbsize; i++)
|
||||
if (pwi[i] == 1)
|
||||
pair_set->set_wellIdentifiedTaxa[i] = 1;
|
||||
|
||||
set_cov_spc (pair_set, pparams);
|
||||
}
|
||||
|
||||
void get_set_mean_cov_stats (pairset *pair_set, SetParams *pparams)
|
||||
{
|
||||
int32_t i, j, k;
|
||||
int interseq_vals[PRIMERS_IN_SET_COUNT*PRIMERS_IN_SET_COUNT];
|
||||
int interseq_cnt = 0;
|
||||
double msum;
|
||||
int *p1wi;
|
||||
int *p2wi;
|
||||
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
{
|
||||
if (pair_set->set_pairs[i] == -1) continue;
|
||||
p1wi = pparams->sortedpairs[pair_set->set_pairs[i]]->wellIdentifiedSeqs;
|
||||
for (j = i+1; j < PRIMERS_IN_SET_COUNT; j++)
|
||||
{
|
||||
if (pair_set->set_pairs[j] == -1) continue;
|
||||
p2wi = pparams->sortedpairs[pair_set->set_pairs[j]]->wellIdentifiedSeqs;
|
||||
interseq_vals[interseq_cnt] = 0;
|
||||
|
||||
for (k = 0; k < pparams->options->dbsize; k++)
|
||||
if (p1wi[k] == 1 && p2wi[k] == 1)
|
||||
interseq_vals[interseq_cnt]++;
|
||||
interseq_cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
//calculate mean
|
||||
msum = 0;
|
||||
pair_set->set_score = 0;
|
||||
pair_set->set_lmean = 0;
|
||||
pair_set->set_lcov = -1;
|
||||
if (interseq_cnt == 0) return;
|
||||
|
||||
for (i = 0; i < interseq_cnt; i++)
|
||||
msum += interseq_vals[i];
|
||||
pair_set->set_lmean = msum/interseq_cnt;
|
||||
|
||||
msum = 0;
|
||||
for (i = 0; i < interseq_cnt; i++)
|
||||
msum += (interseq_vals[i] - pair_set->set_lmean)*(interseq_vals[i] - pair_set->set_lmean);
|
||||
pair_set->set_lcov = msum/interseq_cnt;
|
||||
|
||||
if (pair_set->set_lcov != 0)
|
||||
pair_set->set_score = (pair_set->set_coverage*pair_set->set_specificity*pair_set->set_lmean)/pair_set->set_lcov;
|
||||
}
|
||||
|
||||
void reset_set_props (pairset *pair_set, SetParams *pparams)
|
||||
{
|
||||
int *pwi;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
{
|
||||
if (pair_set->set_pairs[i] == -1) continue;
|
||||
pwi = pparams->sortedpairs[pair_set->set_pairs[i]]->wellIdentifiedSeqs;
|
||||
for (j = 0; j < pparams->options->dbsize; j++)
|
||||
if (pwi[j] == 1)
|
||||
pair_set->set_wellIdentifiedTaxa[j] = 1;
|
||||
}
|
||||
|
||||
set_cov_spc (pair_set, pparams);
|
||||
get_set_mean_cov_stats (pair_set, pparams);
|
||||
}
|
||||
|
||||
void print_set_info (pairset *pair_set, SetParams *pparams)
|
||||
{
|
||||
int i;
|
||||
int printed1st = 0;
|
||||
|
||||
printf ("%f\t%f\t%f\t%f\t%f\t", pair_set->set_specificity,
|
||||
pair_set->set_coverage,pair_set->set_lmean,
|
||||
pair_set->set_lcov,pair_set->set_score);
|
||||
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
{
|
||||
if (pair_set->set_pairs[i] == -1) continue;
|
||||
|
||||
if (printed1st)
|
||||
printf (":%d", pair_set->set_pairs[i]);
|
||||
else
|
||||
printf ("%d", pair_set->set_pairs[i]);
|
||||
printed1st = 1;
|
||||
}
|
||||
printf ("\n");
|
||||
}
|
||||
|
||||
void some_other_set_possibilities (pairset *pair_set,
|
||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options)
|
||||
{
|
||||
SetParams params;
|
||||
pairset tmp_pair_set;
|
||||
int i, j;
|
||||
int *wi;
|
||||
|
||||
params.sortedpairs = sortedpairs;
|
||||
params.sorted_count = sorted_count;
|
||||
params.seqdb = seqdb;
|
||||
params.options = options;
|
||||
wi = (int *) ECOMALLOC(options->dbsize*sizeof (int),
|
||||
"Could not allocate memory for pair set wi");
|
||||
//print stats for first original set
|
||||
printf ("\nspecificity\tcoverage\tmean\tcovariance\tscore\tprimers\n");
|
||||
print_set_info (pair_set, ¶ms);
|
||||
|
||||
for (i = 0; i < PRIMERS_IN_SET_COUNT; i++)
|
||||
{
|
||||
if (pair_set->set_pairs[i] == -1) continue;
|
||||
|
||||
for (j = 0; j < sorted_count; j++)
|
||||
{
|
||||
if (ok_to_add (j, pair_set))
|
||||
{
|
||||
tmp_pair_set = *pair_set;
|
||||
tmp_pair_set.set_pairs[i] = j;
|
||||
memset (wi, 0, options->dbsize*sizeof (int));
|
||||
tmp_pair_set.set_wellIdentifiedTaxa = wi;
|
||||
reset_set_props (&tmp_pair_set, ¶ms);
|
||||
print_set_info (&tmp_pair_set, ¶ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
ECOFREE (wi, "Could not free memory for pair set wi");
|
||||
}
|
||||
|
41
src/libecoprimer/PrimerSets.h
Normal file
41
src/libecoprimer/PrimerSets.h
Normal file
@ -0,0 +1,41 @@
|
||||
#ifndef PRIMERSETS_H_
|
||||
#define PRIMERSETS_H_
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
#define PRIMERS_IN_SET_COUNT 10
|
||||
|
||||
typedef struct {
|
||||
int *set_wellIdentifiedTaxa;
|
||||
int32_t set_pairs[PRIMERS_IN_SET_COUNT];
|
||||
float set_specificity;
|
||||
float set_coverage;
|
||||
float set_lmean;
|
||||
float set_lcov;
|
||||
float set_score;
|
||||
}pairset;
|
||||
|
||||
typedef struct{
|
||||
ppair_t* sortedpairs;
|
||||
int32_t sorted_count;
|
||||
pecodnadb_t seqdb;
|
||||
poptions_t options;
|
||||
}SetParams;
|
||||
|
||||
typedef struct{
|
||||
float t_spc; //specificity contribution
|
||||
float t_cov; //coverage contribution
|
||||
float t_lmd; //link spread difference
|
||||
float len; //length
|
||||
float score; //score
|
||||
}primerscore;
|
||||
|
||||
void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams);
|
||||
void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams);
|
||||
float get_links_distribution (int prb_idx, pairset *prob_set, SetParams *pparams);
|
||||
pairset build_primers_set (ppair_t* sortedpairs, int32_t sorted_count, pecodnadb_t seqdb,
|
||||
poptions_t options);
|
||||
void get_set_mean_cov_stats (pairset *prob_set, SetParams *pparams);
|
||||
void some_other_set_possibilities (pairset *pair_set,
|
||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
#endif
|
@ -173,6 +173,9 @@ typedef struct {
|
||||
uint32_t outtaxa; //< counterexample taxa count
|
||||
uint32_t notwellidentifiedtaxa;
|
||||
|
||||
int *wellIdentifiedSeqs; //< an array having elements equla to total seqs
|
||||
// values are either 0 or 1, if seq is well identified
|
||||
// its 1 else 0
|
||||
|
||||
// these statistics are relative to inexample sequences
|
||||
|
||||
@ -286,6 +289,7 @@ typedef struct {
|
||||
int saltmethod;
|
||||
float salt;
|
||||
PNNParams pnparm;
|
||||
bool_t print_sets_of_primers;
|
||||
} options_t, *poptions_t;
|
||||
|
||||
typedef ecoseq_t **pecodnadb_t;
|
||||
@ -347,7 +351,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb,
|
||||
poptions_t options);
|
||||
float taxonomycoverage(ppair_t pair, poptions_t options);
|
||||
char ecoComplementChar(char base);
|
||||
void taxonomyspecificity (ppair_t pair);
|
||||
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize);
|
||||
|
||||
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
|
||||
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum);
|
||||
|
@ -179,9 +179,16 @@ void twalkaction (const void *node, VISIT order, int level)
|
||||
counttaxon(taxid);
|
||||
}
|
||||
|
||||
void taxonomyspecificity (ppair_t pair)
|
||||
int32_t gtxid;
|
||||
void twalkaction2 (const void *node, VISIT order, int level)
|
||||
{
|
||||
uint32_t i;
|
||||
int32_t *pt = (int32_t *) node;
|
||||
gtxid = *pt;
|
||||
}
|
||||
|
||||
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
{
|
||||
uint32_t i, j;
|
||||
uint32_t ampfindex = 0;
|
||||
int32_t taxid;
|
||||
void *ampftree = NULL;
|
||||
@ -219,11 +226,31 @@ void taxonomyspecificity (ppair_t pair)
|
||||
}
|
||||
}
|
||||
|
||||
memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int));
|
||||
counttaxon(-1);
|
||||
for (i = 0; i < ampfindex; i++)
|
||||
{
|
||||
if (ampfwithtaxtree[i].taxoncount > 1)
|
||||
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
|
||||
//TR 5/9/10 - added code for well identified seqs
|
||||
else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/
|
||||
{
|
||||
gtxid = -1;
|
||||
twalk(ampfwithtaxtree[i].taxontree, twalkaction2);
|
||||
|
||||
if (gtxid != -1)
|
||||
{
|
||||
for (j = 0; j < seqdbsize; j++)
|
||||
if (seqdb[j]->ranktaxonid == gtxid
|
||||
&&(pair->p1->directCount[j] > 0
|
||||
|| pair->p1->reverseCount[j] > 0)
|
||||
&& (pair->p2->directCount[j] > 0
|
||||
|| pair->p2->reverseCount[j] > 0))
|
||||
{
|
||||
pair->wellIdentifiedSeqs[j] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair->notwellidentifiedtaxa = counttaxon(-2);
|
||||
|
Reference in New Issue
Block a user