/* * strictprimers.c * * Created on: 7 nov. 2008 * Author: coissac */ #include "ecoprimer.h" #include #include pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq) { uint32_t i; uint32_t buffsize; //wordcount_t t; if (!table) table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure"); table->words=NULL; table->size =0; table->outseqcount=0; table->inseqcount=0; table->strictcount =0; if (seq) { table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize); table->size = ecoCompactHashSequence(table->words,buffsize); table->inseqcount=1; table->strictcount =ECOMALLOC((table->size*sizeof(uint32_t)), "Cannot allocate memory for word count table" ); for (i=0; i < table->size; i++) table->strictcount[i]=1; } return table; } void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq) { uint32_t buffersize; pword_t newtable; uint32_t newsize; uint32_t i; buffersize = table->size + ecoWordCount(wordsize,circular,seq); table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t), "Cannot allocate memory to extend word table"); newtable = table->words + table->size; // DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable); (void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize); // DEBUG_LOG("new seq wordCount : %d",newsize); newsize = ecoCompactHashSequence(newtable,newsize); // DEBUG_LOG("compacted wordCount : %d",newsize); buffersize = table->size + newsize; // resize the count buffer table->inseqcount++; table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t), "Cannot allocate memory to extend example word count table"); for (i=table->size; i < buffersize; i++) table->strictcount[i]=1; // Now we have to merge in situ the two tables ecomerge(table,table->size,newsize,exampleCount - table->inseqcount,seqQuorum); // DEBUG_LOG("Dictionnary size : %d",table->size); } pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, uint32_t exampleCount,poptions_t options) { uint32_t i; bool_t first=TRUE; pwordcount_t strictprimers=NULL; uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum); fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount); strictprimers = initCountTable(NULL,options->primer_length, options->circular, options->doublestrand, NULL); for (i=0;iisexample) { if (first) { strictprimers = initCountTable(strictprimers,options->primer_length, options->circular, options->doublestrand, database[i]); first=FALSE; } else { uint32_t s; s = strictprimers->size; // DEBUG_LOG("stack size : %u",s); addSeqToWordCountTable(strictprimers,options->primer_length, options->circular, options->doublestrand, exampleCount, sequenceQuorum, database[i]); }; } else strictprimers->outseqcount++; fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10d \r",(int32_t)i+1,(int32_t)seqdbsize,strictprimers->size); // DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0]) // DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1]) } strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount, sizeof(uint32_t)*strictprimers->size, "Cannot reallocate strict primer count table"); strictprimers->words = ECOREALLOC(strictprimers->words, sizeof(word_t)*strictprimers->size, "Cannot reallocate strict primer table"); return strictprimers; } uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers) { uint32_t i; uint32_t w; for (i=0,w=0;i < strictprimers->size;i++) { if (w < i) { strictprimers->words[w]=strictprimers->words[i]; strictprimers->strictcount[w]=strictprimers->strictcount[i]; } if (! ISMULTIWORD(strictprimers->words[w])) w++; } strictprimers->size=w; strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount, sizeof(uint32_t)*strictprimers->size, "Cannot reallocate strict primer count table"); strictprimers->words = ECOREALLOC(strictprimers->words, sizeof(word_t)*strictprimers->size, "Cannot reallocate strict primer table"); return w; }