Files
ecoprimers/src/libecoprimer/strictprimers.c
2009-04-23 11:38:52 +00:00

175 lines
5.2 KiB
C

/*
* strictprimers.c
*
* Created on: 7 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
#include <string.h>
#include <math.h>
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq)
{
uint32_t i;
uint32_t buffsize;
//wordcount_t t;
if (!table)
table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
table->words=NULL;
table->size =0;
table->outseqcount=0;
table->inseqcount=0;
table->strictcount =0;
if (seq)
{
table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize);
table->size = ecoCompactHashSequence(table->words,buffsize);
table->inseqcount=1;
table->strictcount =ECOMALLOC((table->size*sizeof(uint32_t)),
"Cannot allocate memory for word count table"
);
for (i=0; i < table->size; i++) table->strictcount[i]=1;
}
return table;
}
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq)
{
uint32_t buffersize;
pword_t newtable;
uint32_t newsize;
uint32_t i;
buffersize = table->size + ecoWordCount(wordsize,circular,seq);
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
"Cannot allocate memory to extend word table");
newtable = table->words + table->size;
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize);
// DEBUG_LOG("new seq wordCount : %d",newsize);
newsize = ecoCompactHashSequence(newtable,newsize);
// DEBUG_LOG("compacted wordCount : %d",newsize);
buffersize = table->size + newsize;
// resize the count buffer
table->inseqcount++;
table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t),
"Cannot allocate memory to extend example word count table");
for (i=table->size; i < buffersize; i++)
table->strictcount[i]=1;
// Now we have to merge in situ the two tables
ecomerge(table,table->size,newsize,exampleCount - table->inseqcount,seqQuorum);
// DEBUG_LOG("Dictionnary size : %d",table->size);
}
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options)
{
uint32_t i;
bool_t first=TRUE;
pwordcount_t strictprimers=NULL;
uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount);
strictprimers = initCountTable(NULL,options->primer_length,
options->circular,
options->doublestrand,
NULL);
for (i=0;i<seqdbsize;i++)
{
if (database[i]->isexample)
{
if (first)
{
strictprimers = initCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
database[i]);
first=FALSE;
}
else
{
uint32_t s;
s = strictprimers->size;
// DEBUG_LOG("stack size : %u",s);
addSeqToWordCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
exampleCount,
sequenceQuorum,
database[i]);
};
}
else
strictprimers->outseqcount++;
fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10d \r",(int32_t)i+1,(int32_t)seqdbsize,strictprimers->size);
// DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0])
// DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1])
}
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
sizeof(uint32_t)*strictprimers->size,
"Cannot reallocate strict primer count table");
strictprimers->words = ECOREALLOC(strictprimers->words,
sizeof(word_t)*strictprimers->size,
"Cannot reallocate strict primer table");
return strictprimers;
}
uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers)
{
uint32_t i;
uint32_t w;
for (i=0,w=0;i < strictprimers->size;i++)
{
if (w < i)
{
strictprimers->words[w]=strictprimers->words[i];
strictprimers->strictcount[w]=strictprimers->strictcount[i];
}
if (! ISMULTIWORD(strictprimers->words[w]))
w++;
}
strictprimers->size=w;
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
sizeof(uint32_t)*strictprimers->size,
"Cannot reallocate strict primer count table");
strictprimers->words = ECOREALLOC(strictprimers->words,
sizeof(word_t)*strictprimers->size,
"Cannot reallocate strict primer table");
return w;
}