New version 0.3 with filtering on short words

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@213 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2009-05-13 06:51:25 +00:00
parent 5dc55c7f53
commit b7c1640042
12 changed files with 330 additions and 34 deletions

View File

@ -1 +1 @@
0.2
0.3

BIN
src/ecoPrimer Executable file

Binary file not shown.

View File

@ -14,7 +14,7 @@
#include <time.h>
#include <sys/time.h>
#define VERSION "0.2"
#define VERSION "0.3"
/* TR: by default, statistics are made on species level*/
#define DEFAULTTAXONRANK "species"
@ -98,6 +98,7 @@ static void ExitUsage(int stat)
void initoptions(poptions_t options)
{
options->statistics=FALSE;
options->filtering=TRUE;
options->lmin=0; //< Amplifia minimal length
options->lmax=1000; //< Amplifia maximal length
options->error_max=3; //**< maximum error count in fuzzy search
@ -432,6 +433,7 @@ int main(int argc, char **argv)
uint32_t i;
pwordcount_t words;
// pwordcount_t words2;
pprimercount_t primers;
ppairtree_t pairs;
@ -442,7 +444,7 @@ int main(int argc, char **argv)
initoptions(&options);
while ((carg = getopt(argc, argv, "hvcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) {
while ((carg = getopt(argc, argv, "hfvcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) {
switch (carg) {
/* ---------------------------- */
@ -451,6 +453,12 @@ int main(int argc, char **argv)
options.statistics=TRUE;
break;
/* ---------------------------- */
case 'f': /* set in single strand mode */
/* ---------------------------- */
options.filtering=FALSE;
break;
/* -------------------- */
case 'd': /* database name */
/* -------------------- */
@ -599,12 +607,20 @@ int main(int argc, char **argv)
fprintf(stderr,"\nIndexing words in sequences\n");
printcurrenttimeinmilli();
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
printcurrenttimeinmilli();
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
// options.filtering=FALSE;
// words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
// fprintf(stderr,"\n Strict primer count : %d\n",words2->size);
//
// fprintf(stderr,"\n\n Primer sample : \n");
// for (i=0; i<words->size; i++)
// fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
// fprintf(stderr,"\n\n Primer sample : \n");
// for (i=0; i<words2->size; i++)
// fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words2->words[i],options.primer_length),words2->strictcount[i]);
if (options.no_multi_match)
{
(void)filterMultiStrictPrimer(words);

View File

@ -17,7 +17,7 @@ void eco_untrace_memory_allocation()
void ecoMallocedMemory()
{
return eco_amount_malloc;
//eco_amount_malloc;
}
void *eco_malloc(int64_t chunksize,
@ -60,7 +60,7 @@ void *eco_realloc(void *chunk,
if (!newchunk)
{
ecoError(ECO_MEM_ERROR,error_message,filename,line);
fprintf('Requested memory : %d\n',newsize);
fprintf(stderr,"Requested memory : %d\n",newsize);
}
if (!chunk)
eco_chunk_malloc++;

View File

@ -13,7 +13,8 @@ SOURCES = goodtaxon.c \
pairtree.c \
pairs.c \
taxstats.c \
apat_search.c
apat_search.c \
filtering.c
SRCS=$(SOURCES)

View File

@ -47,6 +47,7 @@ typedef uint64_t word_t, *pword_t;
#define WORDMASK(s) ((1LLU << ((s) * 2)) -1)
#define LSHIFTWORD(x,s) (((x) << 2) & WORDMASK(s))
#define RSHIFTWORD(x,s) (((x) & WORDMASK(s))>> 2)
#define ERRORMASK(s) ((int32_t)((1LLU << (s)) -1))
#define RAPPENDBASE(x,s,c) (LSHIFTWORD((x),(s)) | (word_t)(c))
#define LAPPENDBASE(x,s,c) (RSHIFTWORD((x),(s)) | ((word_t)((~(c)) & 3) << (((s)-1) *2)))
@ -65,6 +66,13 @@ typedef uint64_t word_t, *pword_t;
#define MINI(x,y) (((x) < (y)) ? (x):(y))
#define MAXI(x,y) (((x) < (y)) ? (y):(x))
#define FWORDSIZE (13)
#define FWORDMASK WORDMASK(FWORDSIZE)
#define FILTERWORD(x) ((uint32_t)((x) & FWORDMASK))
#define CFILTERWORD(x,s) ((uint32_t)(((x) >> (((s)-FWORDSIZE)*2)) & FWORDMASK))
typedef struct {
pword_t words;
uint32_t *strictcount;
@ -231,6 +239,7 @@ typedef struct {
typedef struct {
bool_t statistics;
bool_t filtering;
uint32_t lmin; //**< Amplifia minimal length
uint32_t lmax; //**< Amplifia maximal length
uint32_t error_max; //**< maximum error count in fuzzy search
@ -270,7 +279,8 @@ pecodnadb_t readdnadb(const char *name, uint32_t *size);
int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq);
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size);
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size,int32_t *neededWords,uint32_t neededWordCount,
int32_t quorum);
uint32_t ecoCompactHashSequence(pword_t dest,uint32_t size);
const char* ecoUnhashWord(word_t word,uint32_t size);
word_t ecoComplementWord(word_t word,uint32_t size);
@ -278,8 +288,8 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word);
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum);
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq);
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq);
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount);
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount);
pqueue_t newQueue(pqueue_t queue, uint32_t size);
pqueue_t resizeQueue(pqueue_t queue, uint32_t size);
@ -318,4 +328,8 @@ float taxonomycoverage(ppair_t pair, poptions_t options);
char ecoComplementChar(char base);
void taxonomyspecificity (ppair_t pair);
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum);
#endif /* EPSORT_H_ */

View File

@ -0,0 +1,183 @@
/*
* filtering.c
*
* Created on: 12 mai 2009
* Author: coissac
*/
#include "ecoprimer.h"
#include <string.h>
#include <math.h>
#include "hashencoder.h"
static int32_t *ecoFilteringHashSequence(int32_t *dest,
uint32_t circular,
uint32_t doublestrand,
ecoseq_t *seq,
uint32_t *size);
static int32_t *ecoFilteringHashSequence(int32_t *dest,
uint32_t circular,
uint32_t doublestrand,
ecoseq_t *seq,
uint32_t *size)
{
static char *in_last_seq=NULL;
uint32_t i=0;
uint32_t j;
char *base;
int8_t code;
int32_t error=0;
word_t word=0;
word_t antiword=0;
uint32_t goodword;
uint32_t lmax=0;
// run on the first call;
if (dest==(void*)-1)
{
if (in_last_seq) ECOFREE(in_last_seq,"Free in last seq table");
return NULL;
}
*size = pow(4,FWORDSIZE);
if (!in_last_seq)
in_last_seq = ECOMALLOC(*size*sizeof(char),
"Cannot allocate filtering hash table");
memset(in_last_seq,0,*size*sizeof(char));
if (!dest)
{
dest = ECOMALLOC(*size*sizeof(int32_t),
"Cannot allocate filtering hash table");
memset(dest,0,*size*sizeof(int32_t));
}
lmax = seq->SQ_length;
if (!circular)
lmax-= FWORDSIZE-1;
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i));
for (i=0, base = seq->SQ; i < FWORDSIZE && i < lmax; i++,base++)
{
error<<= 1;
error&=ERRORMASK(FWORDSIZE);
code = encoder[(*base) - 'A'];
if (code <0)
{
code = 0;
error|= 1;
}
word=RAPPENDBASE(word,FWORDSIZE,code);
if (doublestrand)
antiword=LAPPENDBASE(antiword,FWORDSIZE,code);
}
if (!error && i==FWORDSIZE)
{
goodword=(uint32_t)((doublestrand) ? MINI(word,antiword):word);
if (!in_last_seq[goodword])
{
in_last_seq[goodword]=1;
dest[goodword]++;
}
}
for (j=1; j < lmax; j++,i++,base++)
{
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
/* roll over the sequence for circular ones */
if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
error<<= 1;
error&=ERRORMASK(FWORDSIZE);
code = encoder[(*base) - 'A'];
if (code <0)
{
code = 0;
error|= 1;
}
word=RAPPENDBASE(word,FWORDSIZE,code);
if (doublestrand)
antiword=LAPPENDBASE(antiword,FWORDSIZE,code);
if (!error)
{
goodword=(uint32_t)((doublestrand) ? MINI(word,antiword):word);
if (!in_last_seq[goodword])
{
in_last_seq[goodword]=1;
dest[goodword]++;
}
}
}
return dest;
}
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum)
{
int32_t *wordscount=NULL;
int32_t keep=0;
uint32_t i,j=0;
for (i=0;i<seqdbsize;i++)
{
if (database[i]->isexample)
{
j++;
wordscount=ecoFilteringHashSequence(wordscount,
options->circular,
options->doublestrand,
database[i],
size);
}
fprintf(stderr," Filtered sequences %5u/%5u \r",j,exampleCount);
}
fprintf(stderr,"\n");
for (i=0;i<*size;i++)
if (wordscount[i] >= sequenceQuorum)
keep++;
(void)ecoFilteringHashSequence((int32_t*)-1,
options->circular,
options->doublestrand,
NULL,
NULL);
fprintf(stderr,"ok\n Considered word of size %d for filtering : %d\n",FWORDSIZE,keep);
return wordscount;
}

View File

@ -0,0 +1,21 @@
/*
* hashencoder.h
*
* Created on: 12 mai 2009
* Author: coissac
*/
#ifndef HASHENCODER_H_
#define HASHENCODER_H_
static int8_t encoder[] = {0, // A
-1, // b
1, // C
-1,-1,-1, // d, e, f
2, // G
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // h,i,j,k,l,m,n,o,p,q,r,s
3,3, // T,U
-1,-1,-1,-1,-1}; // v,w,x,y,z
#endif /* HASHENCODER_H_ */

View File

@ -10,15 +10,7 @@
static int cmpword(const void *x,const void *y);
static int8_t encoder[] = {0, // A
-1, // b
1, // C
-1,-1,-1, // d, e, f
2, // G
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // h,i,j,k,l,m,n,o,p,q,r,s
3,3, // T,U
-1,-1,-1,-1,-1}; // v,w,x,y,z
#include "hashencoder.h"
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
{
@ -31,7 +23,15 @@ uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
return wordcount;
}
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size)
pword_t ecoHashSequence(pword_t dest,
uint32_t wordsize,
uint32_t circular,
uint32_t doublestrand,
ecoseq_t *seq,
uint32_t *size,
int32_t *neededWords,
uint32_t neededWordCount,
int32_t quorum)
{
uint32_t i=0;
uint32_t j;
@ -40,6 +40,7 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
int32_t error=0;
word_t word=0;
word_t antiword=0;
word_t goodword;
uint32_t lmax=0;
(*size)=0;
@ -57,7 +58,9 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
for (i=0, base = seq->SQ; i < wordsize && i < lmax; i++,base++)
{
error<<= 1;
error&=ERRORMASK(wordsize);
code = encoder[(*base) - 'A'];
if (code <0)
@ -68,10 +71,22 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
word=RAPPENDBASE(word,wordsize,code);
if (doublestrand)
antiword=LAPPENDBASE(antiword,wordsize,code);
if (neededWordCount && i>=(FWORDSIZE-1))
{
goodword = (doublestrand) ? MINI(FILTERWORD(word),CFILTERWORD(antiword,wordsize)):FILTERWORD(word);
if (neededWords[(uint32_t)goodword]<quorum)
error|= (1 << (FWORDSIZE-1));
}
}
if (!error && i==wordsize)
{
dest[*size]=(doublestrand) ? MINI(word,antiword):word;
@ -85,9 +100,11 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
/* roll over the sequence for circular ones */
if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
error<<= 1;
error&=ERRORMASK(wordsize);
code = encoder[(*base) - 'A'];
if (code <0)
@ -100,6 +117,17 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
if (doublestrand)
antiword=LAPPENDBASE(antiword,wordsize,code);
if (neededWordCount)
{
goodword = (doublestrand) ? MINI(FILTERWORD(word),CFILTERWORD(antiword,wordsize)):FILTERWORD(word);
if (neededWords[(uint32_t)goodword]<quorum)
error|= (1 << (FWORDSIZE-1));
// else
// DEBUG_LOG("%s goodword = %p %d/%d (pos:%d error:%d)",seq->AC,goodword,neededWords[(uint32_t)goodword],quorum,i,error);
}
if (!error)
{
dest[*size]=(doublestrand) ? MINI(word,antiword):word;
@ -107,7 +135,7 @@ pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint
}
}
// DEBUG_LOG("%s goodword = %d",seq->AC,*size);
return dest;
}
@ -116,12 +144,16 @@ uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
{
uint32_t i,j;
word_t current;
// bool_t here=FALSE;
sortword(table,size);
current = 0;
current=SETMULTIWORD(current); /* build impossible word for the first loop cycle */
// if (strcmp(ecoUnhashWord(table[size-1],18),"GTTTGTTCAACGATTAAA")==0)
// here=TRUE;
for (i=0,j=0; j < size;j++)
{
if (WORD(table[j])!=current)
@ -134,6 +166,9 @@ uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
table[i]=SETMULTIWORD(table[i]);
}
// if (strcmp(ecoUnhashWord(WORD(table[i-1]),18),"TACGACCTCGATGTTGGA")==0)
// DEBUG_LOG("winner %d",i)
return i;
}

View File

@ -41,7 +41,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
(void)mergeInit(&merged,data,s1,s2);
(void)newQueue(&queue,MINI(s1,s2));
while (merged.read1 < s1 && merged.read2 < merged.size)
while (merged.read1 < s1 || merged.read2 < merged.size)
{
if (! queue.empty)
{
@ -56,7 +57,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
source=S1;
}
if (WORD(currentword) > WORD(merged.words[merged.read2]))
if (merged.read2 < merged.size &&
WORD(currentword) > WORD(merged.words[merged.read2]))
{
currentword = merged.words[merged.read2];
currentcount = merged.count[merged.read2];
@ -114,6 +116,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
if (merged.read2 < merged.size)
{
//DEBUG_LOG("end1 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
for (;merged.read2 < merged.size;merged.read2++)
{
merged.words[merged.write]=merged.words[merged.read2];
@ -122,7 +126,10 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
merged.write++;
}
else while (! queue.empty)
}
else {
//DEBUG_LOG("end2 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
while (! queue.empty)
{
// DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty)
merged.words[merged.write]=queue.words[queue.pop];
@ -131,6 +138,7 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
if (remainingSeq + merged.count[merged.write] >= seqQuorum)
merged.write++;
}
}
data->size = merged.write;

View File

@ -48,7 +48,7 @@ double timeval_subtract (struct timeval *x, struct timeval *y)
return (double)result.tv_sec + (double)result.tv_usec/1e6;
}
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq)
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount)
{
uint32_t i;
uint32_t buffsize;
@ -65,7 +65,7 @@ pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
if (seq)
{
table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize);
table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize,neededWords,neededWordCount,seqQuorum);
table->size = ecoCompactHashSequence(table->words,buffsize);
table->inseqcount=1;
@ -79,7 +79,7 @@ pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
return table;
}
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq)
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount)
{
uint32_t buffersize;
pword_t newtable;
@ -96,7 +96,7 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize);
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize,neededWords,neededWordCount,seqQuorum);
// DEBUG_LOG("new seq wordCount : %d",newsize);
newsize = ecoCompactHashSequence(newtable,newsize);
@ -137,6 +137,18 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
pwordcount_t strictprimers=NULL;
uint64_t totallength=0;
uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
int32_t *neededWords;
uint32_t neededWordCount;
fprintf(stderr,"Filtering... ");
if (options->filtering)
neededWords = filteringSeq(database,seqdbsize,exampleCount,options,&neededWordCount,(int32_t)sequenceQuorum);
else
{
neededWordCount=0;
neededWords=NULL;
}
if (options->statistics)
{
@ -152,7 +164,8 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
strictprimers = initCountTable(NULL,options->primer_length,
options->circular,
options->doublestrand,
NULL);
0,
NULL,NULL,0);
getrusage(RUSAGE_SELF,&start);
@ -167,7 +180,8 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
strictprimers = initCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
database[i]);
sequenceQuorum,
database[i],neededWords,neededWordCount);
first=FALSE;
}
else
@ -180,7 +194,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
options->doublestrand,
exampleCount,
sequenceQuorum,
database[i]);
database[i],neededWords,neededWordCount);
};
totallength+=database[i]->SQ_length;
getrusage(RUSAGE_SELF,&usage);
@ -215,6 +229,9 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
sizeof(word_t)*strictprimers->size,
"Cannot reallocate strict primer table");
if (neededWords)
ECOFREE(neededWords,"Clean needed word table");
return strictprimers;
}

View File

@ -47,7 +47,6 @@ int32_t counttaxon(int32_t taxid)
tsearch((void*)((size_t)taxid),&taxontree,cmptaxon);
taxoncount++;
}
return taxoncount;
}
@ -60,6 +59,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
ecotx_t *tmptaxon;
counttaxon(-1);
options->intaxa = 0;
for (i=0;i<seqdbsize;i++)
{
@ -85,6 +85,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
}
counttaxon(-1);
options->outtaxa = 0;
for (i=0;i<seqdbsize;i++)
{