diff --git a/.pydevproject b/.pydevproject index 6725cde..ac30bc7 100644 --- a/.pydevproject +++ b/.pydevproject @@ -1,7 +1,7 @@ - + -Default +Python 2.6 python 2.6 diff --git a/src/ecoprimer.c b/src/ecoprimer.c index 2342318..c55dda3 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -53,37 +53,50 @@ static void PrintHelp() PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n"); PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n"); PP "-h : [H]elp - print help\n\n"); - PP "-i : [I]gnore the given taxonomy id.\n\n"); + PP "-i : [I]gnore the given taxonomy id (define the counterexample taxon set).\n\n"); PP "-l : minimum [L]ength : define the minimum amplication length. \n\n"); PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n"); - PP "-r : [R]estricts the search to the given taxonomic id.\n\n"); + PP "-r : [R]estricts the search to the given taxonomic id (restrict the example taxon set).\n\n"); PP "-c : Consider that the database sequences are [c]ircular\n\n"); -// PP "-3 : Three prime strict match\n\n"); + PP "-3 : Three prime strict match\n\n"); PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n"); PP "-s : [S]ensitivity quorum\n\n"); PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n"); PP "-x : false positive quorum\n\n"); PP "-D : set in [d]ouble strand mode\n\n"); + PP "-O : set the primer length (default 18) \n\n"); PP "-S : Set in [s]ingle strand mode\n\n"); + PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1 or OWCZARZY:2, default=1)\n\n"); + PP "-a : Salt contentration in M for Tm computation (default 0.05 M)\n\n"); PP "-U : No multi match\n\n"); + PP "-U : Define the [R]eference sequence identifier (must be part of example set)\n\n"); + PP "-A : Print the list of all identifier of sequences present in the database\n\n"); + PP "-f : Remove data mining step during strict primer identification\n\n"); + PP "-v : Store statistic file about memory usage during strict primer identification\n\n"); PP "\n"); PP "------------------------------------------\n"); PP "Table result description : \n"); - PP "column 1 : serial number\n"); - PP "column 2 : primer1\n"); - PP "column 3 : primer2\n"); - PP "column 4 : good/bad\n"); - PP "column 5 : in sequence count\n"); - PP "column 6 : out sequence count\n"); - PP "column 7 : yule\n"); - PP "column 8 : in taxa count\n"); - PP "column 9 : out taxa count\n"); - PP "column 10 : coverage\n"); - PP "column 11 : unambiguously identified taxa\n"); - PP "column 12 : specificity\n"); - PP "column 13 : minimum amplified length\n"); - PP "column 14 : maximum amplified length\n"); - PP "column 15 : average amplified length\n"); + PP "column 1 : serial number\n"); + PP "column 2 : primer1\n"); + PP "column 3 : primer2\n"); + PP "column 4 : primer1 Tm without mismatch\n"); + PP "column 5 : primer1 lowest Tm against exemple sequences\n"); + PP "column 6 : primer2 Tm without mismatch\n"); + PP "column 7 : primer2 lowest Tm against exemple sequences\n"); + PP "column 8 : primer1 G+C count\n"); + PP "column 9 : primer2 G+C count\n"); + PP "column 10 : good/bad\n"); + PP "column 11 : amplified example sequence count\n"); + PP "column 12 : amplified counterexample sequence count\n"); + PP "column 13 : yule\n"); + PP "column 14 : amplified example taxa count\n"); + PP "column 15 : amplified counterexample taxa count\n"); + PP "column 16 : ratio of amplified example taxa versus all example taxa (Bc index)\n"); + PP "column 17 : unambiguously identified example taxa count\n"); + PP "column 18 : ratio of specificity unambiguously identified example taxa versus all example taxa (Bs index)\n"); + PP "column 19 : minimum amplified length\n"); + PP "column 20 : maximum amplified length\n"); + PP "column 21 : average amplified length\n"); PP "------------------------------------------\n"); PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n"); PP "------------------------------------------\n\n"); @@ -690,7 +703,7 @@ int main(int argc, char **argv) fprintf(stderr,"Reading sequence database ...\n"); - seqdb = readdnadb(options.prefix,&seqdbsize); + seqdb = readdnadb(options.prefix,taxonomy,&seqdbsize, &options); if (options.printAC) { diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index a25c8ad..195ae57 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -290,9 +290,11 @@ typedef ecoseq_t **pecodnadb_t; void sortword(pword_t table,uint32_t N); -pecodnadb_t readdnadb(const char *name, uint32_t *size); +pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,poptions_t options); int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options); +int isExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options); +int isCounterExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options); uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq); pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size,int32_t *neededWords,uint32_t neededWordCount, diff --git a/src/libecoprimer/goodtaxon.c b/src/libecoprimer/goodtaxon.c index f4d7598..09b47ce 100644 --- a/src/libecoprimer/goodtaxon.c +++ b/src/libecoprimer/goodtaxon.c @@ -25,3 +25,31 @@ int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options) return result; } + +int isExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options) +{ + int result; + + result=( (options->r == 0) || (eco_is_taxid_included(taxonomy, + options->restricted_taxid, + options->r, + taxonomy->taxons->taxon[taxon].taxid) + )); + + return result; +} + + +int isCounterExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options) +{ + int result; + + result=((options->g != 0) && (eco_is_taxid_included(taxonomy, + options->ignored_taxid, + options->g, + taxonomy->taxons->taxon[taxon].taxid) + )); + + + return result; +} diff --git a/src/libecoprimer/readdnadb.c b/src/libecoprimer/readdnadb.c index 98867dd..ced45c5 100644 --- a/src/libecoprimer/readdnadb.c +++ b/src/libecoprimer/readdnadb.c @@ -7,7 +7,7 @@ #include "ecoprimer.h" -pecodnadb_t readdnadb(const char *name, uint32_t *size) +pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,poptions_t options) { ecoseq_t *seq; uint32_t buffsize=100; @@ -18,15 +18,24 @@ pecodnadb_t readdnadb(const char *name, uint32_t *size) for(seq=ecoseq_iterator(name), *size=0; seq; - seq=ecoseq_iterator(NULL), (*size)++ + seq=ecoseq_iterator(NULL) ) { - if (*size==buffsize) - { - buffsize*=2; - db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory"); - } - db[*size]=seq; + if (isExampleTaxon(taxonomy,seq->taxid,options) || + isCounterExampleTaxon(taxonomy,seq->taxid,options)) + { + if (*size==buffsize) + { + buffsize*=2; + db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory"); + } + db[*size]=seq; + (*size)++; + } + else + { + delete_ecoseq(seq); + } }; db = ECOREALLOC(db,(*size)*sizeof(ecoseq_t*),"I cannot allocate db memory"); @@ -47,4 +56,4 @@ void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize) fprintf (stderr, "seq %d = %s\n", i, ch); } exit (0); -} \ No newline at end of file +} diff --git a/src/libthermo/libthermo.a b/src/libthermo/libthermo.a deleted file mode 100644 index be29e70..0000000 Binary files a/src/libthermo/libthermo.a and /dev/null differ