2009-03-04 22:32:55 +00:00
|
|
|
/*
|
|
|
|
* strictprimers.c
|
|
|
|
*
|
|
|
|
* Created on: 7 nov. 2008
|
|
|
|
* Author: coissac
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ecoprimer.h"
|
|
|
|
#include <string.h>
|
|
|
|
#include <math.h>
|
|
|
|
|
|
|
|
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq)
|
|
|
|
{
|
|
|
|
uint32_t i;
|
|
|
|
uint32_t buffsize;
|
|
|
|
//wordcount_t t;
|
2009-04-23 11:38:52 +00:00
|
|
|
|
2009-03-04 22:32:55 +00:00
|
|
|
if (!table)
|
|
|
|
table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
|
|
|
|
|
|
|
|
table->words=NULL;
|
|
|
|
table->size =0;
|
|
|
|
table->outseqcount=0;
|
|
|
|
table->inseqcount=0;
|
|
|
|
table->strictcount =0;
|
|
|
|
|
|
|
|
if (seq)
|
|
|
|
{
|
|
|
|
table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize);
|
|
|
|
table->size = ecoCompactHashSequence(table->words,buffsize);
|
|
|
|
|
|
|
|
table->inseqcount=1;
|
|
|
|
table->strictcount =ECOMALLOC((table->size*sizeof(uint32_t)),
|
|
|
|
"Cannot allocate memory for word count table"
|
|
|
|
);
|
|
|
|
|
|
|
|
for (i=0; i < table->size; i++) table->strictcount[i]=1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return table;
|
|
|
|
}
|
|
|
|
|
|
|
|
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq)
|
|
|
|
{
|
|
|
|
uint32_t buffersize;
|
|
|
|
pword_t newtable;
|
|
|
|
uint32_t newsize;
|
|
|
|
uint32_t i;
|
|
|
|
|
|
|
|
buffersize = table->size + ecoWordCount(wordsize,circular,seq);
|
|
|
|
|
|
|
|
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
|
|
|
|
"Cannot allocate memory to extend word table");
|
|
|
|
|
|
|
|
|
|
|
|
newtable = table->words + table->size;
|
|
|
|
|
|
|
|
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
|
|
|
|
|
|
|
|
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize);
|
|
|
|
// DEBUG_LOG("new seq wordCount : %d",newsize);
|
|
|
|
|
|
|
|
newsize = ecoCompactHashSequence(newtable,newsize);
|
|
|
|
|
|
|
|
// DEBUG_LOG("compacted wordCount : %d",newsize);
|
|
|
|
buffersize = table->size + newsize;
|
|
|
|
|
|
|
|
// resize the count buffer
|
|
|
|
|
|
|
|
table->inseqcount++;
|
|
|
|
|
|
|
|
|
|
|
|
table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t),
|
|
|
|
"Cannot allocate memory to extend example word count table");
|
|
|
|
|
|
|
|
for (i=table->size; i < buffersize; i++)
|
|
|
|
table->strictcount[i]=1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Now we have to merge in situ the two tables
|
|
|
|
|
|
|
|
ecomerge(table,table->size,newsize,exampleCount - table->inseqcount,seqQuorum);
|
|
|
|
// DEBUG_LOG("Dictionnary size : %d",table->size);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
|
|
|
|
uint32_t exampleCount,poptions_t options)
|
|
|
|
{
|
|
|
|
uint32_t i;
|
2009-04-23 11:38:52 +00:00
|
|
|
bool_t first=TRUE;
|
2009-03-04 22:32:55 +00:00
|
|
|
pwordcount_t strictprimers=NULL;
|
|
|
|
uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
|
|
|
|
|
|
|
|
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount);
|
|
|
|
|
|
|
|
strictprimers = initCountTable(NULL,options->primer_length,
|
|
|
|
options->circular,
|
|
|
|
options->doublestrand,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
|
|
|
|
for (i=0;i<seqdbsize;i++)
|
|
|
|
{
|
|
|
|
if (database[i]->isexample)
|
|
|
|
{
|
2009-04-23 11:38:52 +00:00
|
|
|
if (first)
|
|
|
|
{
|
|
|
|
strictprimers = initCountTable(strictprimers,options->primer_length,
|
|
|
|
options->circular,
|
|
|
|
options->doublestrand,
|
|
|
|
database[i]);
|
|
|
|
first=FALSE;
|
|
|
|
}
|
|
|
|
else
|
2009-03-04 22:32:55 +00:00
|
|
|
{
|
|
|
|
uint32_t s;
|
|
|
|
s = strictprimers->size;
|
|
|
|
// DEBUG_LOG("stack size : %u",s);
|
|
|
|
addSeqToWordCountTable(strictprimers,options->primer_length,
|
|
|
|
options->circular,
|
|
|
|
options->doublestrand,
|
|
|
|
exampleCount,
|
|
|
|
sequenceQuorum,
|
|
|
|
database[i]);
|
2009-04-23 11:38:52 +00:00
|
|
|
};
|
2009-03-04 22:32:55 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
else
|
|
|
|
strictprimers->outseqcount++;
|
|
|
|
|
|
|
|
fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10d \r",(int32_t)i+1,(int32_t)seqdbsize,strictprimers->size);
|
|
|
|
|
|
|
|
// DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0])
|
|
|
|
// DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1])
|
|
|
|
}
|
|
|
|
|
|
|
|
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
|
|
|
|
sizeof(uint32_t)*strictprimers->size,
|
|
|
|
"Cannot reallocate strict primer count table");
|
|
|
|
strictprimers->words = ECOREALLOC(strictprimers->words,
|
|
|
|
sizeof(word_t)*strictprimers->size,
|
|
|
|
"Cannot reallocate strict primer table");
|
|
|
|
|
|
|
|
return strictprimers;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers)
|
|
|
|
{
|
|
|
|
uint32_t i;
|
|
|
|
uint32_t w;
|
|
|
|
|
|
|
|
for (i=0,w=0;i < strictprimers->size;i++)
|
|
|
|
{
|
|
|
|
if (w < i)
|
|
|
|
{
|
|
|
|
strictprimers->words[w]=strictprimers->words[i];
|
|
|
|
strictprimers->strictcount[w]=strictprimers->strictcount[i];
|
|
|
|
}
|
|
|
|
if (! ISMULTIWORD(strictprimers->words[w]))
|
|
|
|
w++;
|
|
|
|
}
|
|
|
|
|
|
|
|
strictprimers->size=w;
|
|
|
|
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
|
|
|
|
sizeof(uint32_t)*strictprimers->size,
|
|
|
|
"Cannot reallocate strict primer count table");
|
|
|
|
strictprimers->words = ECOREALLOC(strictprimers->words,
|
|
|
|
sizeof(word_t)*strictprimers->size,
|
|
|
|
"Cannot reallocate strict primer table");
|
|
|
|
|
|
|
|
return w;
|
|
|
|
}
|