New version 0.3 with filtering on short words

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@213 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-05-13 06:51:25 +00:00
parent 5dc55c7f53
commit b7c1640042
12 changed files with 330 additions and 34 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-0.2
+0.3
--- a/src/ecoPrimer
+++ b/src/ecoPrimer
--- a/src/ecoprimer.c
+++ b/src/ecoprimer.c
@ -14,7 +14,7 @@
 #include <time.h>
 #include <sys/time.h>
-#define VERSION "0.2"
+#define VERSION "0.3"
  /* TR: by default, statistics are made on species level*/
 #define DEFAULTTAXONRANK "species"
@ -98,6 +98,7 @@ static void ExitUsage(int stat)
 void initoptions(poptions_t options)
 {
 	options->statistics=FALSE;
 	options->filtering=TRUE;
 	options->lmin=0;                   //< Amplifia minimal length
 	options->lmax=1000;                   //< Amplifia maximal length
 	options->error_max=3;              //**< maximum error count in fuzzy search
@ -432,6 +433,7 @@ int main(int argc, char **argv)
 	uint32_t        i;
 	pwordcount_t    words;
 //	pwordcount_t    words2;
 	pprimercount_t  primers;
 	ppairtree_t		pairs;
@ -442,7 +444,7 @@ int main(int argc, char **argv)
 	initoptions(&options);
-    while ((carg = getopt(argc, argv, "hvcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) {
+    while ((carg = getopt(argc, argv, "hfvcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) {
     switch (carg) {
 								 /* ---------------------------- */
@ -451,6 +453,12 @@ int main(int argc, char **argv)
 			options.statistics=TRUE;
 			break;
 			 /* ---------------------------- */
 		 case 'f':               /* set in single strand mode    */
 			 /* ---------------------------- */
 			 options.filtering=FALSE;
 			 break;
                                /* -------------------- */
        case 'd':               /* database name        */
                                /* -------------------- */
@ -599,12 +607,20 @@ int main(int argc, char **argv)
    fprintf(stderr,"\nIndexing words in sequences\n");
    printcurrenttimeinmilli();
    words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
    printcurrenttimeinmilli();
    fprintf(stderr,"\n  Strict primer count : %d\n",words->size);
 //    options.filtering=FALSE;
 //    words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
 //    fprintf(stderr,"\n  Strict primer count : %d\n",words2->size);
 //
 //    fprintf(stderr,"\n\n  Primer sample : \n");
 //    for (i=0; i<words->size; i++)
 //    	fprintf(stderr,"  + Primer : %s   sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
 //    fprintf(stderr,"\n\n  Primer sample : \n");
 //    for (i=0; i<words2->size; i++)
 //    	fprintf(stderr,"  + Primer : %s   sequence count : %d\n",ecoUnhashWord(words2->words[i],options.primer_length),words2->strictcount[i]);
    if (options.no_multi_match)
    {
    	(void)filterMultiStrictPrimer(words);
--- a/src/libecoPCR/ecoMalloc.c
+++ b/src/libecoPCR/ecoMalloc.c
@ -17,7 +17,7 @@ void    eco_untrace_memory_allocation()
 void ecoMallocedMemory()
 {
-	return eco_amount_malloc;
+	//eco_amount_malloc;
 }
 void   *eco_malloc(int64_t chunksize,
@ -60,7 +60,7 @@ void   *eco_realloc(void *chunk,
 	if (!newchunk)
           {
 		ecoError(ECO_MEM_ERROR,error_message,filename,line);
-                fprintf('Requested memory : %d\n',newsize);
+                fprintf(stderr,"Requested memory : %d\n",newsize);
           }
 	if (!chunk)
 		eco_chunk_malloc++;
--- a/src/libecoprimer/Makefile
+++ b/src/libecoprimer/Makefile
@ -13,7 +13,8 @@ SOURCES = goodtaxon.c \
          pairtree.c \
          pairs.c \
          taxstats.c \
-          apat_search.c
+          apat_search.c \
 		  filtering.c
 SRCS=$(SOURCES)
--- a/src/libecoprimer/ecoprimer.h
+++ b/src/libecoprimer/ecoprimer.h
@ -47,6 +47,7 @@ typedef uint64_t word_t, *pword_t;
 #define WORDMASK(s)       ((1LLU << ((s) * 2)) -1)
 #define LSHIFTWORD(x,s)    (((x) << 2) & WORDMASK(s))
 #define RSHIFTWORD(x,s)    (((x) & WORDMASK(s))>> 2)
 #define ERRORMASK(s)       ((int32_t)((1LLU << (s)) -1))
 #define RAPPENDBASE(x,s,c) (LSHIFTWORD((x),(s)) | (word_t)(c))
 #define LAPPENDBASE(x,s,c) (RSHIFTWORD((x),(s)) | ((word_t)((~(c)) & 3) << (((s)-1) *2)))
@ -65,6 +66,13 @@ typedef uint64_t word_t, *pword_t;
 #define MINI(x,y) (((x) < (y)) ? (x):(y))
 #define MAXI(x,y) (((x) < (y)) ? (y):(x))
 #define FWORDSIZE (13)
 #define FWORDMASK WORDMASK(FWORDSIZE)
 #define FILTERWORD(x) ((uint32_t)((x) & FWORDMASK))
 #define CFILTERWORD(x,s) ((uint32_t)(((x) >> (((s)-FWORDSIZE)*2)) & FWORDMASK))
 typedef struct {
 	pword_t    words;
 	uint32_t    *strictcount;
@ -231,6 +239,7 @@ typedef struct {
 typedef struct {
 	bool_t         statistics;
 	bool_t         filtering;
 	uint32_t        lmin;                   //**< Amplifia minimal length
 	uint32_t        lmax;                   //**< Amplifia maximal length
 	uint32_t        error_max;              //**< maximum error count in fuzzy search
@ -270,7 +279,8 @@ pecodnadb_t readdnadb(const char *name, uint32_t *size);
 int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
 uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq);
-pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size);
+pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size,int32_t *neededWords,uint32_t neededWordCount,
 	    int32_t quorum);
 uint32_t ecoCompactHashSequence(pword_t dest,uint32_t size);
 const char* ecoUnhashWord(word_t word,uint32_t size);
 word_t ecoComplementWord(word_t word,uint32_t size);
@ -278,8 +288,8 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word);
 void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum);
-pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq);
+pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount);
-void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq);
+void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount);
 pqueue_t newQueue(pqueue_t queue, uint32_t size);
 pqueue_t resizeQueue(pqueue_t queue, uint32_t size);
@ -318,4 +328,8 @@ float taxonomycoverage(ppair_t pair, poptions_t options);
 char ecoComplementChar(char base);
 void taxonomyspecificity (ppair_t pair);
 int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
 					 uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t  sequenceQuorum);
 #endif /* EPSORT_H_ */
--- a/src/libecoprimer/filtering.c
+++ b/src/libecoprimer/filtering.c
@ -0,0 +1,183 @@
 /*
 * filtering.c
 *
 *  Created on: 12 mai 2009
 *      Author: coissac
 */
 #include "ecoprimer.h"
 #include <string.h>
 #include <math.h>
 #include "hashencoder.h"
 static int32_t *ecoFilteringHashSequence(int32_t *dest,
 										 uint32_t circular,
 										 uint32_t doublestrand,
 										 ecoseq_t *seq,
 										 uint32_t *size);
 static	int32_t *ecoFilteringHashSequence(int32_t *dest,
 										  uint32_t circular,
 										  uint32_t doublestrand,
 										  ecoseq_t *seq,
 										  uint32_t *size)
 {
 	static char    *in_last_seq=NULL;
 	uint32_t i=0;
 	uint32_t j;
 	char *base;
 	int8_t code;
 	int32_t error=0;
 	word_t word=0;
 	word_t antiword=0;
 	uint32_t goodword;
 	uint32_t lmax=0;
 	// run on the first call;
 	if (dest==(void*)-1)
 	{
 		if (in_last_seq) ECOFREE(in_last_seq,"Free in last seq table");
 		return NULL;
 	}
 	*size = pow(4,FWORDSIZE);
 	if (!in_last_seq)
 		in_last_seq = ECOMALLOC(*size*sizeof(char),
 								"Cannot allocate filtering hash table");
 	memset(in_last_seq,0,*size*sizeof(char));
 	if (!dest)
 	{
 		dest = ECOMALLOC(*size*sizeof(int32_t),
 				               "Cannot allocate filtering hash table");
 		memset(dest,0,*size*sizeof(int32_t));
 	}
 	lmax = seq->SQ_length;
 	if (!circular)
       lmax-= FWORDSIZE-1;
 //	DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i));
 	for (i=0, base = seq->SQ; i < FWORDSIZE && i < lmax; i++,base++)
 	{
 		error<<= 1;
 		error&=ERRORMASK(FWORDSIZE);
 		code = encoder[(*base) - 'A'];
 		if (code <0)
 		{
 			code = 0;
 			error|= 1;
 		}
 		word=RAPPENDBASE(word,FWORDSIZE,code);
 		if (doublestrand)
 			antiword=LAPPENDBASE(antiword,FWORDSIZE,code);
 	}
 	if (!error && i==FWORDSIZE)
 	{
 		goodword=(uint32_t)((doublestrand) ? MINI(word,antiword):word);
 		if (!in_last_seq[goodword])
 		{
 			in_last_seq[goodword]=1;
 			dest[goodword]++;
 		}
 	}
 	for (j=1; j < lmax; j++,i++,base++)
 	{
 //		DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
 							/* roll over the sequence for circular ones */
 		if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
 		error<<= 1;
 		error&=ERRORMASK(FWORDSIZE);
 		code = encoder[(*base) - 'A'];
 		if (code <0)
 		{
 			code = 0;
 			error|= 1;
 		}
 		word=RAPPENDBASE(word,FWORDSIZE,code);
 		if (doublestrand)
 			antiword=LAPPENDBASE(antiword,FWORDSIZE,code);
 		if (!error)
 		{
 			goodword=(uint32_t)((doublestrand) ? MINI(word,antiword):word);
 			if (!in_last_seq[goodword])
 			{
 				in_last_seq[goodword]=1;
 				dest[goodword]++;
 			}
 		}
 	}
 	return dest;
 }
 int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
 					 uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t  sequenceQuorum)
 {
 	int32_t *wordscount=NULL;
 	int32_t keep=0;
 	uint32_t i,j=0;
 	for (i=0;i<seqdbsize;i++)
    {
    	if (database[i]->isexample)
    	{
    		j++;
    		wordscount=ecoFilteringHashSequence(wordscount,
 									 options->circular,
                                     options->doublestrand,
                                     database[i],
                                     size);
    	}
    	fprintf(stderr,"  Filtered sequences %5u/%5u          \r",j,exampleCount);
    }
 	fprintf(stderr,"\n");
 	for (i=0;i<*size;i++)
 		if (wordscount[i] >= sequenceQuorum)
 			keep++;
 	(void)ecoFilteringHashSequence((int32_t*)-1,
 									options->circular,
 	                                options->doublestrand,
 	                                NULL,
 	                                NULL);
 	fprintf(stderr,"ok\n Considered word of size %d for filtering : %d\n",FWORDSIZE,keep);
 	return wordscount;
 }
--- a/src/libecoprimer/hashencoder.h
+++ b/src/libecoprimer/hashencoder.h
@ -0,0 +1,21 @@
 /*
 * hashencoder.h
 *
 *  Created on: 12 mai 2009
 *      Author: coissac
 */
 #ifndef HASHENCODER_H_
 #define HASHENCODER_H_
 static int8_t encoder[] = {0,                                            // A
 		                   -1,                                           // b
 		                   1,                                            // C
 	       	               -1,-1,-1,                                     // d, e, f
 		                   2,                                            // G
 		                   -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,          // h,i,j,k,l,m,n,o,p,q,r,s
 		                   3,3,                                           // T,U
 		                   -1,-1,-1,-1,-1};                              // v,w,x,y,z
 #endif /* HASHENCODER_H_ */
--- a/src/libecoprimer/hashsequence.c
+++ b/src/libecoprimer/hashsequence.c
@ -10,15 +10,7 @@
 static int cmpword(const void *x,const void *y);
-static int8_t encoder[] = {0,                                            // A
+#include "hashencoder.h"
 		                   -1,                                           // b
 		                   1,                                            // C
 	       	               -1,-1,-1,                                     // d, e, f
 		                   2,                                            // G
 		                   -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,          // h,i,j,k,l,m,n,o,p,q,r,s
 		                   3,3,                                           // T,U
 		                   -1,-1,-1,-1,-1};                              // v,w,x,y,z
 uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
 {
@ -31,7 +23,15 @@ uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
 	return wordcount;
 }
-pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size)
+pword_t ecoHashSequence(pword_t dest,
 					    uint32_t wordsize,
 					    uint32_t circular,
 					    uint32_t doublestrand,
 					    ecoseq_t *seq,
 					    uint32_t *size,
 					    int32_t  *neededWords,
 					    uint32_t neededWordCount,
 					    int32_t quorum)
 {
 	uint32_t i=0;
 	uint32_t j;
@ -40,6 +40,7 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
 	int32_t error=0;
 	word_t word=0;
 	word_t antiword=0;
 	word_t goodword;
 	uint32_t lmax=0;
 	(*size)=0;
@ -57,7 +58,9 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
 	for (i=0, base = seq->SQ; i < wordsize && i < lmax; i++,base++)
 	{
 		error<<= 1;
 		error&=ERRORMASK(wordsize);
 		code = encoder[(*base) - 'A'];
 		if (code <0)
@ -68,10 +71,22 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
 		word=RAPPENDBASE(word,wordsize,code);
 		if (doublestrand)
 			antiword=LAPPENDBASE(antiword,wordsize,code);
 		if (neededWordCount && i>=(FWORDSIZE-1))
 		{
 			goodword = (doublestrand) ? MINI(FILTERWORD(word),CFILTERWORD(antiword,wordsize)):FILTERWORD(word);
 			if (neededWords[(uint32_t)goodword]<quorum)
 				error|= (1 << (FWORDSIZE-1));
 		}
 	}
 	if (!error && i==wordsize)
 	{
 		dest[*size]=(doublestrand) ? MINI(word,antiword):word;
@ -85,9 +100,11 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
 //		DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
 							/* roll over the sequence for circular ones */
 		if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
 		error<<= 1;
 		error&=ERRORMASK(wordsize);
 		code = encoder[(*base) - 'A'];
 		if (code <0)
@ -100,6 +117,17 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
 		if (doublestrand)
 			antiword=LAPPENDBASE(antiword,wordsize,code);
 		if (neededWordCount)
 		{
 			goodword = (doublestrand) ? MINI(FILTERWORD(word),CFILTERWORD(antiword,wordsize)):FILTERWORD(word);
 			if (neededWords[(uint32_t)goodword]<quorum)
 				error|= (1 << (FWORDSIZE-1));
 //			else
 //				DEBUG_LOG("%s goodword = %p %d/%d (pos:%d error:%d)",seq->AC,goodword,neededWords[(uint32_t)goodword],quorum,i,error);
 		}
 		if (!error)
 		{
 			dest[*size]=(doublestrand) ? MINI(word,antiword):word;
@ -107,7 +135,7 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
 		}
 	}
-
+	// DEBUG_LOG("%s goodword = %d",seq->AC,*size);
 	return dest;
 }
@ -116,12 +144,16 @@ uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
 {
 	uint32_t i,j;
 	word_t  current;
 //	bool_t here=FALSE;
 	sortword(table,size);
 	current = 0;
 	current=SETMULTIWORD(current);   /* build impossible word for the first loop cycle */
 //	if (strcmp(ecoUnhashWord(table[size-1],18),"GTTTGTTCAACGATTAAA")==0)
 //		here=TRUE;
 	for (i=0,j=0; j < size;j++)
 	{
 		if (WORD(table[j])!=current)
@ -134,6 +166,9 @@ uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
 			table[i]=SETMULTIWORD(table[i]);
 	}
 //		if (strcmp(ecoUnhashWord(WORD(table[i-1]),18),"TACGACCTCGATGTTGGA")==0)
 //			DEBUG_LOG("winner %d",i)
 	return i;
 }
--- a/src/libecoprimer/merge.c
+++ b/src/libecoprimer/merge.c
@ -41,7 +41,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
 	(void)mergeInit(&merged,data,s1,s2);
 	(void)newQueue(&queue,MINI(s1,s2));
-	while (merged.read1 < s1 && merged.read2 < merged.size)
+
 	while (merged.read1 < s1 || merged.read2 < merged.size)
 	{
 		if (! queue.empty)
 		{
@ -56,7 +57,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
 			source=S1;
 		}
-		if (WORD(currentword) > WORD(merged.words[merged.read2]))
+		if (merged.read2 < merged.size &&
 				WORD(currentword) > WORD(merged.words[merged.read2]))
 		{
 			currentword  = merged.words[merged.read2];
 			currentcount = merged.count[merged.read2];
@ -114,6 +116,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
 	if (merged.read2 < merged.size)
 		{
 			//DEBUG_LOG("end1 %d %d/%d  %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
 		for (;merged.read2 < merged.size;merged.read2++)
 		{
 			merged.words[merged.write]=merged.words[merged.read2];
@ -122,7 +126,10 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
 	        	merged.write++;
 		}
-	else while (! queue.empty)
+		}
 	else {
 		//DEBUG_LOG("end2 %d %d/%d  %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
 		while (! queue.empty)
 		{
 //			DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d  empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty)
 			merged.words[merged.write]=queue.words[queue.pop];
@ -131,6 +138,7 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
 	        if (remainingSeq + merged.count[merged.write] >= seqQuorum)
 	        	merged.write++;
 		}
 		}
 	data->size = merged.write;
--- a/src/libecoprimer/strictprimers.c
+++ b/src/libecoprimer/strictprimers.c
@ -48,7 +48,7 @@ double timeval_subtract (struct timeval *x, struct timeval *y)
   return (double)result.tv_sec + (double)result.tv_usec/1e6;
 }
-pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq)
+pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount)
 {
 	uint32_t i;
 	uint32_t buffsize;
@ -65,7 +65,7 @@ pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
 	if (seq)
 	{
-		table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize);
+		table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize,neededWords,neededWordCount,seqQuorum);
 		table->size  = ecoCompactHashSequence(table->words,buffsize);
 		table->inseqcount=1;
@ -79,7 +79,7 @@ pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
 	return table;
 }
-void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq)
+void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount)
 {
 	uint32_t buffersize;
 	pword_t newtable;
@ -96,7 +96,7 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
 //	DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
-	(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize);
+	(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize,neededWords,neededWordCount,seqQuorum);
 //	DEBUG_LOG("new seq wordCount : %d",newsize);
 	newsize = ecoCompactHashSequence(newtable,newsize);
@ -137,6 +137,18 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
 	pwordcount_t strictprimers=NULL;
 	uint64_t totallength=0;
 	uint32_t  sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
 	int32_t *neededWords;
 	uint32_t neededWordCount;
 	fprintf(stderr,"Filtering... ");
 	if (options->filtering)
 		neededWords = filteringSeq(database,seqdbsize,exampleCount,options,&neededWordCount,(int32_t)sequenceQuorum);
 	else
 	{
 		neededWordCount=0;
 		neededWords=NULL;
 	}
 	if (options->statistics)
 	{
@ -152,7 +164,8 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
 	strictprimers = initCountTable(NULL,options->primer_length,
                                                 options->circular,
                                                 options->doublestrand,
-                                   NULL);
+                                                 0,
                                   NULL,NULL,0);
 	getrusage(RUSAGE_SELF,&start);
@ -167,7 +180,8 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
    			strictprimers = initCountTable(strictprimers,options->primer_length,
                                                             options->circular,
                                                             options->doublestrand,
-                                               database[i]);
+                                                             sequenceQuorum,
                                               database[i],neededWords,neededWordCount);
    			first=FALSE;
    		}
    		else
@ -180,7 +194,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
    					                             options->doublestrand,
    					                             exampleCount,
    					                             sequenceQuorum,
-    					               database[i]);
+    					               database[i],neededWords,neededWordCount);
    		};
    		totallength+=database[i]->SQ_length;
    		getrusage(RUSAGE_SELF,&usage);
@ -215,6 +229,9 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
 										sizeof(word_t)*strictprimers->size,
 										"Cannot reallocate strict primer table");
    if (neededWords)
    	ECOFREE(neededWords,"Clean needed word table");
 	return strictprimers;
 }
--- a/src/libecoprimer/taxstats.c
+++ b/src/libecoprimer/taxstats.c
@ -47,7 +47,6 @@ int32_t counttaxon(int32_t taxid)
 		tsearch((void*)((size_t)taxid),&taxontree,cmptaxon);
 		taxoncount++;
 	}
 	return taxoncount;
 }
@ -60,6 +59,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
 	ecotx_t  *tmptaxon;
 	counttaxon(-1);
 	options->intaxa = 0;
    for (i=0;i<seqdbsize;i++)
 	{
@ -85,6 +85,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
 	}
 	counttaxon(-1);
 	options->outtaxa = 0;
    for (i=0;i<seqdbsize;i++)
 		{