Second run of patch

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@204 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2009-04-23 11:38:52 +00:00
parent 313dd59f5a
commit c36bc5e838
4 changed files with 120 additions and 32 deletions

View File

@ -51,7 +51,7 @@ static void PrintHelp()
PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n");
PP "-r : [R]estricts the search to the given taxonomic id.\n\n");
PP "-c : Consider that the database sequences are [c]ircular\n\n");
PP "-3 : Three prime strict match\n\n");
// PP "-3 : Three prime strict match\n\n");
PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n");
PP "-s : [S]ensitivity quorum\n\n");
PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n");
@ -147,21 +147,44 @@ void printcurrenttimeinmilli()
void printapair(int32_t index,ppair_t pair, poptions_t options)
{
bool_t asdirect1=pair->asdirect1;
bool_t asdirect2=pair->asdirect2;
bool_t asdirecttmp;
word_t w1=pair->p1->word;
word_t w2=pair->p2->word;
word_t wtmp;
bool_t good1=pair->p1->good;
bool_t good2=pair->p2->good;
bool_t goodtmp;
if (!asdirect1)
w1=ecoComplementWord(w1,options->primer_length);
if (!asdirect2)
w2=ecoComplementWord(w2,options->primer_length);
if (w2 < w1)
{
wtmp=w1;
w1=w2;
w2=wtmp;
asdirecttmp=asdirect1;
asdirect1=asdirect2;
asdirect2=asdirecttmp;
goodtmp=good1;
good1=good2;
good2=goodtmp;
}
printf("%6d\t",index);
if (pair->asdirect1)
printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
else
printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word,
options->primer_length),options->primer_length));
if (pair->asdirect2)
printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length));
else
printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word,
options->primer_length),options->primer_length));
printf("%s\t",ecoUnhashWord(w1,options->primer_length));
printf("%s",ecoUnhashWord(w2,options->primer_length));
printf("\t%c%c", "bG"[(int)pair->p1->good],"bG"[(int)pair->p2->good]);
printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]);
printf("\t%d", pair->inexample);
@ -246,13 +269,16 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
}
void printpairs (ppairtree_t pairs, poptions_t options)
void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
{
ppair_t* sortedpairs;
ppair_t* index;
ppairlist_t pl;
size_t i,j;
size_t count;
char *taxon[]={"taxon","taxa"};
ecotx_t *current_taxon;
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
@ -274,6 +300,65 @@ void printpairs (ppairtree_t pairs, poptions_t options)
count=filterandsortpairs(sortedpairs,pairs->count,options);
fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count);
printf("#\n");
printf("# ecoPrimer version %s\n",VERSION);
printf("# Rank level optimisation : %s\n", options->taxonrank);
printf("# max error count by oligonucleotide : %d\n",options->error_max);
printf("#\n");
if (options->r)
{
printf("# Restricted to %s:\n",taxon[(options->r>1) ? 1:0]);
for(i=0;i<(uint32_t)options->r;i++)
{
current_taxon=eco_findtaxonbytaxid(taxonomy,options->restricted_taxid[i]);
printf("# %d : %s (%s)\n", current_taxon->taxid,
current_taxon->name,
taxonomy->ranks->label[current_taxon->rank]
);
}
printf("#\n");
}
if (options->g)
{
printf("# Ignore %s:\n",taxon[(options->g>1) ? 1:0]);
for(i=0;i<(uint32_t)options->r;i++)
{
current_taxon=eco_findtaxonbytaxid(taxonomy,options->ignored_taxid[i]);
printf("# %d : %s (%s)\n", current_taxon->taxid,
current_taxon->name,
taxonomy->ranks->label[current_taxon->rank]
);
}
printf("#\n");
}
printf("# strict primer quorum : %3.2f\n",options->strict_quorum);
printf("# example quorum : %3.2f\n",options->sensitivity_quorum);
if (options->g + options->r)
printf("# counterexample quorum : %3.2f\n",options->false_positive_quorum);
printf("#\n");
printf("# database : %s\n",options->prefix);
printf("# Database is constituted of %5d examples corresponding to %5d %s\n",options->insamples,
options->intaxa,options->taxonrank);
printf("# and %5d counterexamples corresponding to %5d %s\n",options->outsamples,
options->outtaxa,options->taxonrank);
printf("#\n");
if (options->lmin && options->lmax)
printf("# amplifiat length between [%d,%d] bp\n",options->lmin,options->lmax);
else if (options->lmin)
printf("# amplifiat length larger than %d bp\n",options->lmin);
else if (options->lmax)
printf("# amplifiat length smaller than %d bp\n",options->lmax);
if (options->circular)
printf("# DB sequences are considered as circular\n");
else
printf("# DB sequences are considered as linear\n");
printf("#\n");
for (i=0;i < count;i++)
printapair(i,sortedpairs[i],options);
@ -392,12 +477,12 @@ int main(int argc, char **argv)
sscanf(optarg,"%d",&(options.error_max));
break;
/* ------------------------ */
case '3': /* three prime strict match */
/* ------------------------ */
sscanf(optarg,"%d",&(options.strict_three_prime));
break;
//
// /* ------------------------ */
// case '3': /* three prime strict match */
// /* ------------------------ */
// sscanf(optarg,"%d",&(options.strict_three_prime));
// break;
/* -------------------- */
case 'q': /* strict matching quorum */
@ -555,7 +640,7 @@ int main(int argc, char **argv)
// setoktaxforspecificity (&pairs);
printpairs (pairs, &options);
printpairs (pairs, &options,taxonomy);

View File

@ -103,8 +103,7 @@ int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
for (e = 0, pr = r + 3 ; e <= param->maxerr ; e++, pr += 2)
*pr = cmask;
cmask = ~ param->omask; // A VOIR !!!!! omask (new) doit <20>tre compos<6F> de + et - ... Ancien omask : bits
cmask = ~ param->omask;
/* init. scan */
data = (uint8_t*)(pseq->SQ);

View File

@ -124,7 +124,7 @@ uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
for (i=0,j=0; j < size;j++)
{
if (table[j]!=current)
if (WORD(table[j])!=current)
{
current =table[j];
table[i]=current;
@ -203,6 +203,6 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word)
char ecoComplementChar(char base)
{
return (base < 4)? !base & 3: 4;
return (base < 4)? !base & 3: 4;
}

View File

@ -14,7 +14,7 @@ pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
uint32_t i;
uint32_t buffsize;
//wordcount_t t;
if (!table)
table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
@ -89,6 +89,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options)
{
uint32_t i;
bool_t first=TRUE;
pwordcount_t strictprimers=NULL;
uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
@ -104,7 +105,15 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
{
if (database[i]->isexample)
{
if (strictprimers->size)
if (first)
{
strictprimers = initCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
database[i]);
first=FALSE;
}
else
{
uint32_t s;
s = strictprimers->size;
@ -115,12 +124,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
exampleCount,
sequenceQuorum,
database[i]);
}
else
strictprimers = initCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
database[i]);
};
}
else