diff --git a/src/ecoprimer.c b/src/ecoprimer.c index 613f486..465ca10 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -51,7 +51,7 @@ static void PrintHelp() PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n"); PP "-r : [R]estricts the search to the given taxonomic id.\n\n"); PP "-c : Consider that the database sequences are [c]ircular\n\n"); - PP "-3 : Three prime strict match\n\n"); +// PP "-3 : Three prime strict match\n\n"); PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n"); PP "-s : [S]ensitivity quorum\n\n"); PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n"); @@ -147,21 +147,44 @@ void printcurrenttimeinmilli() void printapair(int32_t index,ppair_t pair, poptions_t options) { + bool_t asdirect1=pair->asdirect1; + bool_t asdirect2=pair->asdirect2; + bool_t asdirecttmp; + word_t w1=pair->p1->word; + word_t w2=pair->p2->word; + word_t wtmp; + bool_t good1=pair->p1->good; + bool_t good2=pair->p2->good; + bool_t goodtmp; + + if (!asdirect1) + w1=ecoComplementWord(w1,options->primer_length); + + if (!asdirect2) + w2=ecoComplementWord(w2,options->primer_length); + + + if (w2 < w1) + { + wtmp=w1; + w1=w2; + w2=wtmp; + + asdirecttmp=asdirect1; + asdirect1=asdirect2; + asdirect2=asdirecttmp; + + goodtmp=good1; + good1=good2; + good2=goodtmp; + } printf("%6d\t",index); - if (pair->asdirect1) - printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length)); - else - printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word, - options->primer_length),options->primer_length)); - if (pair->asdirect2) - printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length)); - else - printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word, - options->primer_length),options->primer_length)); + printf("%s\t",ecoUnhashWord(w1,options->primer_length)); + printf("%s",ecoUnhashWord(w2,options->primer_length)); - printf("\t%c%c", "bG"[(int)pair->p1->good],"bG"[(int)pair->p2->good]); + printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]); printf("\t%d", pair->inexample); @@ -246,13 +269,16 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti } -void printpairs (ppairtree_t pairs, poptions_t options) +void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy) { ppair_t* sortedpairs; ppair_t* index; ppairlist_t pl; size_t i,j; size_t count; + char *taxon[]={"taxon","taxa"}; + ecotx_t *current_taxon; + //printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n"); @@ -274,6 +300,65 @@ void printpairs (ppairtree_t pairs, poptions_t options) count=filterandsortpairs(sortedpairs,pairs->count,options); + fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count); + + printf("#\n"); + printf("# ecoPrimer version %s\n",VERSION); + printf("# Rank level optimisation : %s\n", options->taxonrank); + printf("# max error count by oligonucleotide : %d\n",options->error_max); + printf("#\n"); + + if (options->r) + { + printf("# Restricted to %s:\n",taxon[(options->r>1) ? 1:0]); + for(i=0;i<(uint32_t)options->r;i++) + { + current_taxon=eco_findtaxonbytaxid(taxonomy,options->restricted_taxid[i]); + printf("# %d : %s (%s)\n", current_taxon->taxid, + current_taxon->name, + taxonomy->ranks->label[current_taxon->rank] + ); + } + printf("#\n"); + } + if (options->g) + { + printf("# Ignore %s:\n",taxon[(options->g>1) ? 1:0]); + for(i=0;i<(uint32_t)options->r;i++) + { + current_taxon=eco_findtaxonbytaxid(taxonomy,options->ignored_taxid[i]); + printf("# %d : %s (%s)\n", current_taxon->taxid, + current_taxon->name, + taxonomy->ranks->label[current_taxon->rank] + ); + } + printf("#\n"); + } + printf("# strict primer quorum : %3.2f\n",options->strict_quorum); + printf("# example quorum : %3.2f\n",options->sensitivity_quorum); + if (options->g + options->r) + printf("# counterexample quorum : %3.2f\n",options->false_positive_quorum); + + printf("#\n"); + printf("# database : %s\n",options->prefix); + printf("# Database is constituted of %5d examples corresponding to %5d %s\n",options->insamples, + options->intaxa,options->taxonrank); + printf("# and %5d counterexamples corresponding to %5d %s\n",options->outsamples, + options->outtaxa,options->taxonrank); + printf("#\n"); + + if (options->lmin && options->lmax) + printf("# amplifiat length between [%d,%d] bp\n",options->lmin,options->lmax); + else if (options->lmin) + printf("# amplifiat length larger than %d bp\n",options->lmin); + else if (options->lmax) + printf("# amplifiat length smaller than %d bp\n",options->lmax); + if (options->circular) + printf("# DB sequences are considered as circular\n"); + else + printf("# DB sequences are considered as linear\n"); + printf("#\n"); + for (i=0;i < count;i++) printapair(i,sortedpairs[i],options); @@ -392,12 +477,12 @@ int main(int argc, char **argv) sscanf(optarg,"%d",&(options.error_max)); break; - - /* ------------------------ */ - case '3': /* three prime strict match */ - /* ------------------------ */ - sscanf(optarg,"%d",&(options.strict_three_prime)); - break; +// +// /* ------------------------ */ +// case '3': /* three prime strict match */ +// /* ------------------------ */ +// sscanf(optarg,"%d",&(options.strict_three_prime)); +// break; /* -------------------- */ case 'q': /* strict matching quorum */ @@ -555,7 +640,7 @@ int main(int argc, char **argv) // setoktaxforspecificity (&pairs); - printpairs (pairs, &options); + printpairs (pairs, &options,taxonomy); diff --git a/src/libecoprimer/apat_search.c b/src/libecoprimer/apat_search.c index 0ed417a..22ae905 100644 --- a/src/libecoprimer/apat_search.c +++ b/src/libecoprimer/apat_search.c @@ -103,8 +103,7 @@ int32_t ManberSub(pecoseq_t pseq,ppattern_t pat, for (e = 0, pr = r + 3 ; e <= param->maxerr ; e++, pr += 2) *pr = cmask; - cmask = ~ param->omask; // A VOIR !!!!! omask (new) doit tre compos de + et - ... Ancien omask : bits - + cmask = ~ param->omask; /* init. scan */ data = (uint8_t*)(pseq->SQ); diff --git a/src/libecoprimer/hashsequence.c b/src/libecoprimer/hashsequence.c index af89025..207848f 100644 --- a/src/libecoprimer/hashsequence.c +++ b/src/libecoprimer/hashsequence.c @@ -124,7 +124,7 @@ uint32_t ecoCompactHashSequence(pword_t table,uint32_t size) for (i=0,j=0; j < size;j++) { - if (table[j]!=current) + if (WORD(table[j])!=current) { current =table[j]; table[i]=current; @@ -203,6 +203,6 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word) char ecoComplementChar(char base) { - return (base < 4)? !base & 3: 4; + return (base < 4)? !base & 3: 4; } diff --git a/src/libecoprimer/strictprimers.c b/src/libecoprimer/strictprimers.c index 7cca4e8..175379a 100644 --- a/src/libecoprimer/strictprimers.c +++ b/src/libecoprimer/strictprimers.c @@ -14,7 +14,7 @@ pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ uint32_t i; uint32_t buffsize; //wordcount_t t; - + if (!table) table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure"); @@ -89,6 +89,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, uint32_t exampleCount,poptions_t options) { uint32_t i; + bool_t first=TRUE; pwordcount_t strictprimers=NULL; uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum); @@ -104,7 +105,15 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, { if (database[i]->isexample) { - if (strictprimers->size) + if (first) + { + strictprimers = initCountTable(strictprimers,options->primer_length, + options->circular, + options->doublestrand, + database[i]); + first=FALSE; + } + else { uint32_t s; s = strictprimers->size; @@ -115,12 +124,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, exampleCount, sequenceQuorum, database[i]); - } - else - strictprimers = initCountTable(strictprimers,options->primer_length, - options->circular, - options->doublestrand, - database[i]); + }; } else