diff --git a/src/ecoprimer.c b/src/ecoprimer.c index aacb5c2..71220ab 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -6,6 +6,7 @@ */ #include "libecoprimer/ecoprimer.h" +#include "libecoprimer/PrimerSets.h" #include #include #include @@ -74,6 +75,7 @@ static void PrintHelp() PP "-A : Print the list of all identifier of sequences present in the database\n\n"); PP "-f : Remove data mining step during strict primer identification\n\n"); PP "-v : Store statistic file about memory usage during strict primer identification\n\n"); + PP "-p : Print sets of primers\n\n"); PP "\n"); PP "------------------------------------------\n"); PP "Table result description : \n"); @@ -132,9 +134,9 @@ void initoptions(poptions_t options) options->refseq=NULL; options->circular=0; options->doublestrand=1; - options->strict_quorum=0.7; + options->strict_quorum=0.3; options->strict_exclude_quorum=0.1; - options->sensitivity_quorum=0.9; + options->sensitivity_quorum=0.3; options->false_positive_quorum=0.1; options->strict_three_prime=0; options->r=0; @@ -146,6 +148,7 @@ void initoptions(poptions_t options) options->saltmethod = SALT_METHOD_SANTALUCIA; options->salt = DEF_SALT; options->printAC=FALSE; + options->print_sets_of_primers = FALSE; } void printapair(int32_t index,ppair_t pair, poptions_t options) @@ -314,7 +317,7 @@ static int cmpprintedpairs(const void* p1,const void* p2) return 0; } -uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options) +uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options, pecodnadb_t seqdb) { uint32_t i,j; float q,qfp; @@ -329,6 +332,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti qfp = (float)sortedpairs[i]->outexample/options->outsamples; else qfp=0.0; + sortedpairs[i]->wellIdentifiedSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets sortedpairs[i]->quorumin = q; sortedpairs[i]->quorumout = qfp; sortedpairs[i]->yule = q - qfp; @@ -337,8 +341,10 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti if (q > options->sensitivity_quorum && qfp < options->false_positive_quorum) { + //TR 05/09/10 - wellIdentified needed for primer sets + sortedpairs[j]->wellIdentifiedSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array"); (void)taxonomycoverage(sortedpairs[j],options); - taxonomyspecificity(sortedpairs[j]); + taxonomyspecificity(sortedpairs[j], seqdb, options->dbsize); j++; } @@ -348,7 +354,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti } -void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) +void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy, pecodnadb_t seqdb) { ppair_t* sortedpairs; ppair_t* index; @@ -357,7 +363,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) size_t count; char *taxon[]={"taxon","taxa"}; ecotx_t *current_taxon; - + pairset pair_sets; //printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n"); @@ -377,7 +383,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) for (i=0;ipaircount;i++,j++) sortedpairs[j]=pl->pairs+i; - count=filterandsortpairs(sortedpairs,pairs->count,options); + count=filterandsortpairs(sortedpairs,pairs->count,options, seqdb); getThermoProperties(sortedpairs, count, options); fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count); @@ -441,8 +447,12 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) for (i=0;i < count;i++) printapair(i,sortedpairs[i],options); - - + + if (options->print_sets_of_primers == TRUE) + { + pair_sets = build_primers_set (sortedpairs, count, seqdb, options); + some_other_set_possibilities (&pair_sets, sortedpairs, count, seqdb, options); + } } @@ -528,7 +538,7 @@ int main(int argc, char **argv) initoptions(&options); - while ((carg = getopt(argc, argv, "hAfvcUDSE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) { + while ((carg = getopt(argc, argv, "hAfvcUDSpE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) { switch (carg) { /* ---------------------------- */ @@ -691,6 +701,12 @@ int main(int argc, char **argv) options.circular = 1; break; + /* -------------------- */ + case 'p': /* print sets of primers */ + /* --------------------------------- */ + options.print_sets_of_primers = TRUE; + break; + case '?': /* bad option */ /* -------------------- */ errflag++; @@ -799,9 +815,9 @@ int main(int argc, char **argv) fprintf(stderr,"\n"); - pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options); - - printpairs (pairs, &options,taxonomy); - + + pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options); + printpairs (pairs, &options,taxonomy, seqdb); + return 0; } diff --git a/src/libecoprimer/Makefile b/src/libecoprimer/Makefile index 5b33fb9..1d93012 100644 --- a/src/libecoprimer/Makefile +++ b/src/libecoprimer/Makefile @@ -14,7 +14,8 @@ SOURCES = goodtaxon.c \ pairs.c \ taxstats.c \ apat_search.c \ - filtering.c + filtering.c \ + PrimerSets.c SRCS=$(SOURCES) diff --git a/src/libecoprimer/PrimerSets.c b/src/libecoprimer/PrimerSets.c new file mode 100644 index 0000000..30ea575 --- /dev/null +++ b/src/libecoprimer/PrimerSets.c @@ -0,0 +1,388 @@ +#include + +//#include "ecoprimer.h" +#include "PrimerSets.h" + +int32_t counttaxon(int32_t taxid); + +float get_set_coverage (pairset *p_set, SetParams *pparams, int32_t pidx_toexclude) +{ + int32_t i, j; + float cov; + pprimer_t prmr1; + pprimer_t prmr2; + int32_t s_intaxa = 0; + + counttaxon(-1); + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + { + if (p_set->set_pairs[i] == -1 || pidx_toexclude == i) continue; + + prmr1 = pparams->sortedpairs[p_set->set_pairs[i]]->p1; + prmr2 = pparams->sortedpairs[p_set->set_pairs[i]]->p2; + for (j = 0; j < pparams->options->dbsize; j++) + if ((prmr1->directCount[j] > 0 || prmr1->reverseCount[j] > 0) + && (prmr2->directCount[j] > 0 || prmr2->reverseCount[j] > 0) + && pparams->seqdb[j]->isexample && pparams->seqdb[j]->ranktaxonid > 0) + { + s_intaxa = counttaxon(pparams->seqdb[j]->ranktaxonid); + } + } + //fprintf(stderr, "%d/%d\n", s_intaxa, pparams->options->intaxa); + cov = s_intaxa*1.0/(pparams->options->intaxa*1.0); + + return cov; +} + +void set_cov_spc (pairset *p_set, SetParams *pparams) +{ + int32_t i; + int32_t ssp = 0; + + for (i = 0; i < pparams->options->dbsize; i++) + if (p_set->set_wellIdentifiedTaxa[i] == 1) ssp++; + + //set specificity + p_set->set_specificity = ssp*1.0/(pparams->options->insamples*1.0); + + //set coverage + p_set->set_coverage = get_set_coverage (p_set, pparams, -1); +} + +int ok_to_add (int id, pairset *pair_set) +{ + int i; + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + if (pair_set->set_pairs[i] == id) return 0; + + return 1; +} + +//1. Add in set the first pair having highiest specificity +//2. For the sequences not WI by primers in set, calculate count for each pair equal to number of seqs WI by it +//3. Take the pair with highiest such count and see its links with primers already in set, +//4. If no/insufficient links, take next pair +//5. repeate 3,4 untill pair gets added in set +//6. Repeate 2 to 5 until set complete + +pairset build_primers_set (ppair_t* sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, + poptions_t options) +{ + SetParams params; + pairset pair_set; + int32_t i; + int32_t pset_idx; + int32_t prb_idx; + int *pair_wi_count_sorted_ids; + + params.sortedpairs = sortedpairs; + params.sorted_count = sorted_count; + params.seqdb = seqdb; + params.options = options; + + memset (&pair_set, 0, sizeof(pairset)); + pair_set.set_wellIdentifiedTaxa = (int *) ECOMALLOC(options->dbsize*sizeof (int), + "Could not allocate memory for pair set"); + + pair_wi_count_sorted_ids = (int *) ECOMALLOC(sorted_count*sizeof (int), + "Could not allocate memory for pair_wi_count_sorted"); + + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + pair_set.set_pairs[i] = -1; + + pset_idx = 0; + prb_idx = 0; + //add first pair by default, this should be the one having highiest specificty + add_pair_in_set (&pair_set, pset_idx, prb_idx, ¶ms); + pset_idx++; + + while (pset_idx < PRIMERS_IN_SET_COUNT) + { + //get a sorted list of pair ids with the pair well identifying most of the remaining seqs at top + get_next_pair_options (pair_wi_count_sorted_ids, &pair_set, ¶ms); + + if (pair_wi_count_sorted_ids[0] == 0) + { + fprintf (stderr, "No further pair found, total so far %d\n", pset_idx); + break; + } + else + fprintf (stderr, "Some counts found\n"); + + + for (prb_idx = 0; prb_idx < sorted_count; prb_idx++) + { + if (pair_wi_count_sorted_ids[prb_idx]) + if (ok_to_add (pair_wi_count_sorted_ids[prb_idx], &pair_set)) + { + //fprintf (stderr, "Oktoadd\n"); + float ldist = get_links_distribution (pair_wi_count_sorted_ids[prb_idx], &pair_set, ¶ms); + fprintf (stderr, "ldist:%f\n", ldist); + if (ldist >= 0.7) + { + add_pair_in_set (&pair_set, pset_idx, pair_wi_count_sorted_ids[prb_idx], ¶ms); + pset_idx++; + break; + } + else + fprintf (stderr, "ldist:%f\n", ldist); + } + } + + if (prb_idx == sorted_count) + { + fprintf (stderr, "No further pair found, total so far %d\n", pset_idx); + break; + } + + if (pair_set.set_specificity == 1.0) + { + fprintf (stderr, "Set Complete with total primers: %d\n", pset_idx); + break; + } + + } + get_set_mean_cov_stats (&pair_set, ¶ms); + + return pair_set; +} + +float get_links_distribution (int prb_idx, pairset *pair_set, SetParams *pparams) +{ + int i, j; + int *pair_link_count; + int *pwi; + int *pswi; + float pcnt = 0.0; + float pscnt = 0.0; + + pair_link_count = (int *) ECOMALLOC(PRIMERS_IN_SET_COUNT*sizeof (int), + "Could not allocate memory for pair_link_count"); + + pwi = pparams->sortedpairs[prb_idx]->wellIdentifiedSeqs; + //fprintf(stderr, "a,"); + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + { + pair_link_count[i] = 0; + if (pair_set->set_pairs[i] != -1) + { + //fprintf(stderr, "b,"); + pswi = pparams->sortedpairs[pair_set->set_pairs[i]]->wellIdentifiedSeqs; + for (j = 0; j < pparams->options->dbsize; j++) + if (pwi[j] == 1 && pwi[j] == pswi[j]) + pair_link_count[i] += 1; + } + } + //fprintf(stderr, "c,"); + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + { + if (pair_set->set_pairs[i] != -1) + pscnt++; + if (pair_link_count[i] > 0) + pcnt++; + } + ECOFREE (pair_link_count, "Could not free memory for pair_link_count"); + //fprintf(stderr, "d,"); + return (pcnt/pscnt); +} + + +void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams) +{ + int *pair_count; + int32_t i, j; + int max; + int tmp; + + pair_count = (int *) ECOMALLOC(pparams->sorted_count*sizeof (int), + "Could not allocate memory for pair_count"); + + memset (pair_wi_count_sorted_ids, 0, pparams->sorted_count*sizeof(int)); + + for (i = 0; i < pparams->options->dbsize; i++) + { + if (pair_set->set_wellIdentifiedTaxa[i] == 1) continue; + + for (j = 0; j < pparams->sorted_count; j++) + { + if (pparams->sortedpairs[j]->wellIdentifiedSeqs[i] == 1) + pair_count[j] += 1; + } + } + + //set pair ids + for (j = 0; j < pparams->sorted_count; j++) + pair_wi_count_sorted_ids[j] = j; + + //set count of primers already in set to zero (it should already be zero) just for testing + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + if (pair_set->set_pairs[i] != -1) + pair_count[pair_set->set_pairs[i]] = 0; + + //sort two arrays in descending wrt count + for (i = 0; i < pparams->sorted_count - 1; i++) + { + max = i; + for (j = i + 1; j < pparams->sorted_count; j++) + if (pair_count[max] < pair_count[j]) + max = j; + + if (max > i) + { + tmp = pair_count[i]; + pair_count[i] = pair_count[max]; + pair_count[max] = tmp; + + tmp = pair_wi_count_sorted_ids[i]; + pair_wi_count_sorted_ids[i] = pair_wi_count_sorted_ids[max]; + pair_wi_count_sorted_ids[max] = tmp; + } + } + + for (i = 0; i < pparams->sorted_count - 1; i++) + if (pair_count[i] == 0) + pair_wi_count_sorted_ids[i] = 0; + //else + // fprintf (stderr, "%d:%d, ", i, pair_count[i]); + + ECOFREE (pair_count, "Could not free memory for pair_count"); +} + +void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams) +{ + int *pwi; + int32_t i; + + pair_set->set_pairs[pset_idx] = prb_idx; + pwi = pparams->sortedpairs[prb_idx]->wellIdentifiedSeqs; + for (i = 0; i < pparams->options->dbsize; i++) + if (pwi[i] == 1) + pair_set->set_wellIdentifiedTaxa[i] = 1; + + set_cov_spc (pair_set, pparams); +} + +void get_set_mean_cov_stats (pairset *pair_set, SetParams *pparams) +{ + int32_t i, j, k; + int interseq_vals[PRIMERS_IN_SET_COUNT*PRIMERS_IN_SET_COUNT]; + int interseq_cnt = 0; + double msum; + int *p1wi; + int *p2wi; + + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + { + if (pair_set->set_pairs[i] == -1) continue; + p1wi = pparams->sortedpairs[pair_set->set_pairs[i]]->wellIdentifiedSeqs; + for (j = i+1; j < PRIMERS_IN_SET_COUNT; j++) + { + if (pair_set->set_pairs[j] == -1) continue; + p2wi = pparams->sortedpairs[pair_set->set_pairs[j]]->wellIdentifiedSeqs; + interseq_vals[interseq_cnt] = 0; + + for (k = 0; k < pparams->options->dbsize; k++) + if (p1wi[k] == 1 && p2wi[k] == 1) + interseq_vals[interseq_cnt]++; + interseq_cnt++; + } + } + + //calculate mean + msum = 0; + pair_set->set_score = 0; + pair_set->set_lmean = 0; + pair_set->set_lcov = -1; + if (interseq_cnt == 0) return; + + for (i = 0; i < interseq_cnt; i++) + msum += interseq_vals[i]; + pair_set->set_lmean = msum/interseq_cnt; + + msum = 0; + for (i = 0; i < interseq_cnt; i++) + msum += (interseq_vals[i] - pair_set->set_lmean)*(interseq_vals[i] - pair_set->set_lmean); + pair_set->set_lcov = msum/interseq_cnt; + + if (pair_set->set_lcov != 0) + pair_set->set_score = (pair_set->set_coverage*pair_set->set_specificity*pair_set->set_lmean)/pair_set->set_lcov; +} + +void reset_set_props (pairset *pair_set, SetParams *pparams) +{ + int *pwi; + int i, j; + + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + { + if (pair_set->set_pairs[i] == -1) continue; + pwi = pparams->sortedpairs[pair_set->set_pairs[i]]->wellIdentifiedSeqs; + for (j = 0; j < pparams->options->dbsize; j++) + if (pwi[j] == 1) + pair_set->set_wellIdentifiedTaxa[j] = 1; + } + + set_cov_spc (pair_set, pparams); + get_set_mean_cov_stats (pair_set, pparams); +} + +void print_set_info (pairset *pair_set, SetParams *pparams) +{ + int i; + int printed1st = 0; + + printf ("%f\t%f\t%f\t%f\t%f\t", pair_set->set_specificity, + pair_set->set_coverage,pair_set->set_lmean, + pair_set->set_lcov,pair_set->set_score); + + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + { + if (pair_set->set_pairs[i] == -1) continue; + + if (printed1st) + printf (":%d", pair_set->set_pairs[i]); + else + printf ("%d", pair_set->set_pairs[i]); + printed1st = 1; + } + printf ("\n"); +} + +void some_other_set_possibilities (pairset *pair_set, + ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options) +{ + SetParams params; + pairset tmp_pair_set; + int i, j; + int *wi; + + params.sortedpairs = sortedpairs; + params.sorted_count = sorted_count; + params.seqdb = seqdb; + params.options = options; + wi = (int *) ECOMALLOC(options->dbsize*sizeof (int), + "Could not allocate memory for pair set wi"); + //print stats for first original set + printf ("\nspecificity\tcoverage\tmean\tcovariance\tscore\tprimers\n"); + print_set_info (pair_set, ¶ms); + + for (i = 0; i < PRIMERS_IN_SET_COUNT; i++) + { + if (pair_set->set_pairs[i] == -1) continue; + + for (j = 0; j < sorted_count; j++) + { + if (ok_to_add (j, pair_set)) + { + tmp_pair_set = *pair_set; + tmp_pair_set.set_pairs[i] = j; + memset (wi, 0, options->dbsize*sizeof (int)); + tmp_pair_set.set_wellIdentifiedTaxa = wi; + reset_set_props (&tmp_pair_set, ¶ms); + print_set_info (&tmp_pair_set, ¶ms); + } + } + } + ECOFREE (wi, "Could not free memory for pair set wi"); +} + diff --git a/src/libecoprimer/PrimerSets.h b/src/libecoprimer/PrimerSets.h new file mode 100644 index 0000000..8e46432 --- /dev/null +++ b/src/libecoprimer/PrimerSets.h @@ -0,0 +1,41 @@ +#ifndef PRIMERSETS_H_ +#define PRIMERSETS_H_ + +#include "ecoprimer.h" + +#define PRIMERS_IN_SET_COUNT 10 + +typedef struct { + int *set_wellIdentifiedTaxa; + int32_t set_pairs[PRIMERS_IN_SET_COUNT]; + float set_specificity; + float set_coverage; + float set_lmean; + float set_lcov; + float set_score; +}pairset; + +typedef struct{ + ppair_t* sortedpairs; + int32_t sorted_count; + pecodnadb_t seqdb; + poptions_t options; +}SetParams; + +typedef struct{ + float t_spc; //specificity contribution + float t_cov; //coverage contribution + float t_lmd; //link spread difference + float len; //length + float score; //score +}primerscore; + +void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams); +void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams); +float get_links_distribution (int prb_idx, pairset *prob_set, SetParams *pparams); +pairset build_primers_set (ppair_t* sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, + poptions_t options); +void get_set_mean_cov_stats (pairset *prob_set, SetParams *pparams); +void some_other_set_possibilities (pairset *pair_set, + ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options); +#endif diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index 0110e2c..2ad99bf 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -173,6 +173,9 @@ typedef struct { uint32_t outtaxa; //< counterexample taxa count uint32_t notwellidentifiedtaxa; + int *wellIdentifiedSeqs; //< an array having elements equla to total seqs + // values are either 0 or 1, if seq is well identified + // its 1 else 0 // these statistics are relative to inexample sequences @@ -286,6 +289,7 @@ typedef struct { int saltmethod; float salt; PNNParams pnparm; + bool_t print_sets_of_primers; } options_t, *poptions_t; typedef ecoseq_t **pecodnadb_t; @@ -347,7 +351,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb, poptions_t options); float taxonomycoverage(ppair_t pair, poptions_t options); char ecoComplementChar(char base); -void taxonomyspecificity (ppair_t pair); +void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize); int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize, uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum); diff --git a/src/libecoprimer/taxstats.c b/src/libecoprimer/taxstats.c index da7de6f..d1ca219 100644 --- a/src/libecoprimer/taxstats.c +++ b/src/libecoprimer/taxstats.c @@ -179,9 +179,16 @@ void twalkaction (const void *node, VISIT order, int level) counttaxon(taxid); } -void taxonomyspecificity (ppair_t pair) +int32_t gtxid; +void twalkaction2 (const void *node, VISIT order, int level) { - uint32_t i; + int32_t *pt = (int32_t *) node; + gtxid = *pt; +} + +void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize) +{ + uint32_t i, j; uint32_t ampfindex = 0; int32_t taxid; void *ampftree = NULL; @@ -219,11 +226,31 @@ void taxonomyspecificity (ppair_t pair) } } + memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int)); counttaxon(-1); for (i = 0; i < ampfindex; i++) { if (ampfwithtaxtree[i].taxoncount > 1) twalk(ampfwithtaxtree[i].taxontree, twalkaction); + //TR 5/9/10 - added code for well identified seqs + else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/ + { + gtxid = -1; + twalk(ampfwithtaxtree[i].taxontree, twalkaction2); + + if (gtxid != -1) + { + for (j = 0; j < seqdbsize; j++) + if (seqdb[j]->ranktaxonid == gtxid + &&(pair->p1->directCount[j] > 0 + || pair->p1->reverseCount[j] > 0) + && (pair->p2->directCount[j] > 0 + || pair->p2->reverseCount[j] > 0)) + { + pair->wellIdentifiedSeqs[j] = 1; + } + } + } } pair->notwellidentifiedtaxa = counttaxon(-2);