diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..3b04cfb --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.2 diff --git a/src/ecoPrimer b/src/ecoPrimer new file mode 100755 index 0000000..2845505 Binary files /dev/null and b/src/ecoPrimer differ diff --git a/src/ecoprimer.c b/src/ecoprimer.c index 8283965..a10e25a 100644 --- a/src/ecoprimer.c +++ b/src/ecoprimer.c @@ -97,8 +97,9 @@ static void ExitUsage(int stat) void initoptions(poptions_t options) { + options->statistics=FALSE; options->lmin=0; //< Amplifia minimal length - options->lmax=0; //< Amplifia maximal length + options->lmax=1000; //< Amplifia maximal length options->error_max=3; //**< maximum error count in fuzzy search options->primer_length=18; //**< minimal length of the primers options->restricted_taxid=NULL; //**< limit amplification below these taxid @@ -441,9 +442,15 @@ int main(int argc, char **argv) initoptions(&options); - while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) { + while ((carg = getopt(argc, argv, "hvcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) { switch (carg) { + /* ---------------------------- */ + case 'v': /* set in single strand mode */ + /* ---------------------------- */ + options.statistics=TRUE; + break; + /* -------------------- */ case 'd': /* database name */ /* -------------------- */ diff --git a/src/libecoPCR/ecoError.P b/src/libecoPCR/ecoError.P new file mode 100644 index 0000000..7c7ae71 --- /dev/null +++ b/src/libecoPCR/ecoError.P @@ -0,0 +1,15 @@ +ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoIOUtils.P b/src/libecoPCR/ecoIOUtils.P new file mode 100644 index 0000000..fc34a70 --- /dev/null +++ b/src/libecoPCR/ecoIOUtils.P @@ -0,0 +1,15 @@ +ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoMalloc.P b/src/libecoPCR/ecoMalloc.P new file mode 100644 index 0000000..ea3767b --- /dev/null +++ b/src/libecoPCR/ecoMalloc.P @@ -0,0 +1,15 @@ +ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecodna.P b/src/libecoPCR/ecodna.P new file mode 100644 index 0000000..b9a71b9 --- /dev/null +++ b/src/libecoPCR/ecodna.P @@ -0,0 +1,5 @@ +ecodna.o ecodna.P : ecodna.c /usr/include/string.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h ecoPCR.h \ + /usr/include/stdio.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h diff --git a/src/libecoPCR/ecofilter.P b/src/libecoPCR/ecofilter.P new file mode 100644 index 0000000..d46d3e0 --- /dev/null +++ b/src/libecoPCR/ecofilter.P @@ -0,0 +1,5 @@ +ecofilter.o ecofilter.P : ecofilter.c ecoPCR.h /usr/include/stdio.h \ + /usr/include/_types.h /usr/include/sys/_types.h \ + /usr/include/sys/cdefs.h /usr/include/machine/_types.h \ + /usr/include/i386/_types.h /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h diff --git a/src/libecoPCR/econame.P b/src/libecoPCR/econame.P new file mode 100644 index 0000000..4c9946c --- /dev/null +++ b/src/libecoPCR/econame.P @@ -0,0 +1,15 @@ +econame.o econame.P : econame.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecorank.P b/src/libecoPCR/ecorank.P new file mode 100644 index 0000000..75e09b9 --- /dev/null +++ b/src/libecoPCR/ecorank.P @@ -0,0 +1,15 @@ +ecorank.o ecorank.P : ecorank.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoPCR/ecoseq.P b/src/libecoPCR/ecoseq.P new file mode 100644 index 0000000..6222690 --- /dev/null +++ b/src/libecoPCR/ecoseq.P @@ -0,0 +1,19 @@ +ecoseq.o ecoseq.P : ecoseq.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \ + /usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \ + /usr/include/machine/signal.h /usr/include/i386/signal.h \ + /usr/include/i386/_structs.h /usr/include/sys/_structs.h \ + /usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \ + /usr/include/sys/resource.h /usr/include/machine/endian.h \ + /usr/include/i386/endian.h /usr/include/sys/_endian.h \ + /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h \ + /usr/include/string.h /usr/include/zlib.h /usr/include/zconf.h \ + /usr/include/sys/types.h /usr/include/unistd.h \ + /usr/include/sys/unistd.h /usr/include/sys/select.h \ + /usr/include/sys/_select.h diff --git a/src/libecoPCR/ecoseq.c b/src/libecoPCR/ecoseq.c index 141db5d..10bb68a 100644 --- a/src/libecoPCR/ecoseq.c +++ b/src/libecoPCR/ecoseq.c @@ -144,6 +144,7 @@ ecoseq_t *readnext_ecoseq(FILE *f) if (comp_status != Z_OK) ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data"); +// fprintf(stderr,"seq name : %30s seq size : %d\n",seq->DE,seq->SQ_length); return seq; } diff --git a/src/libecoPCR/ecotax.P b/src/libecoPCR/ecotax.P new file mode 100644 index 0000000..489fc97 --- /dev/null +++ b/src/libecoPCR/ecotax.P @@ -0,0 +1,15 @@ +ecotax.o ecotax.P : ecotax.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \ + /usr/include/sys/_types.h /usr/include/sys/cdefs.h \ + /usr/include/machine/_types.h /usr/include/i386/_types.h \ + /usr/include/inttypes.h \ + /usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \ + /usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \ + /usr/include/sys/wait.h /usr/include/sys/signal.h \ + /usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \ + /usr/include/i386/signal.h /usr/include/i386/_structs.h \ + /usr/include/sys/_structs.h /usr/include/machine/_structs.h \ + /usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \ + /usr/include/machine/endian.h /usr/include/i386/endian.h \ + /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ + /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ + /usr/include/machine/types.h /usr/include/i386/types.h diff --git a/src/libecoprimer/ecoprimer.h b/src/libecoprimer/ecoprimer.h index 045fb51..f7a9ec4 100644 --- a/src/libecoprimer/ecoprimer.h +++ b/src/libecoprimer/ecoprimer.h @@ -70,7 +70,7 @@ typedef struct { uint32_t *strictcount; uint32_t inseqcount; uint32_t outseqcount; - uint32_t size; + uint64_t size; } wordcount_t, *pwordcount_t; @@ -230,6 +230,7 @@ typedef struct { }taxontoamp_t, *ptaxontoamp_t; typedef struct { + bool_t statistics; uint32_t lmin; //**< Amplifia minimal length uint32_t lmax; //**< Amplifia maximal length uint32_t error_max; //**< maximum error count in fuzzy search diff --git a/src/libecoprimer/strictprimers.c b/src/libecoprimer/strictprimers.c index 868933c..5a1eaaa 100644 --- a/src/libecoprimer/strictprimers.c +++ b/src/libecoprimer/strictprimers.c @@ -8,6 +8,41 @@ #include "ecoprimer.h" #include #include +#include +#include + +#define RUSAGE_SELF 0 +#define RUSAGE_CHILDREN -1 + +static double timeval_subtract (struct timeval *x, struct timeval *y); + + + /* Subtract the `struct timeval' values X and Y, + Return elapsed secondes as a double. */ + +double timeval_subtract (struct timeval *x, struct timeval *y) +{ + struct timeval result; + + /* Perform the carry for the later subtraction by updating y. */ + if (x->tv_usec < y->tv_usec) { + int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; + y->tv_usec -= 1000000 * nsec; + y->tv_sec += nsec; + } + if (x->tv_usec - y->tv_usec > 1000000) { + int nsec = (x->tv_usec - y->tv_usec) / 1000000; + y->tv_usec += 1000000 * nsec; + y->tv_sec -= nsec; + } + + /* Compute the time remaining to wait. + tv_usec is certainly positive. */ + result.tv_sec = x->tv_sec - y->tv_sec; + result.tv_usec = x->tv_usec - y->tv_usec; + + return (double)result.tv_sec + (double)result.tv_usec/1e6; + } pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq) { @@ -88,11 +123,26 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, uint32_t exampleCount,poptions_t options) { + struct rusage start; + struct rusage usage; + double seconde; + char *logfilename; + FILE *logfile; uint32_t i; bool_t first=TRUE; pwordcount_t strictprimers=NULL; + uint64_t totallength=0; uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum); + if (options->statistics) + { + asprintf(&logfilename,"ecoprimer_%d.log",getpid()); + logfile = fopen(logfilename,"w"); + fprintf(logfile,"# seq\tlength\tsize\ttime\tspeed\n"); + fclose(logfile); + } + + fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount); strictprimers = initCountTable(NULL,options->primer_length, @@ -101,10 +151,13 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, NULL); + getrusage(RUSAGE_SELF,&start); + for (i=0;iisexample) { + if (first) { strictprimers = initCountTable(strictprimers,options->primer_length, @@ -125,7 +178,19 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, sequenceQuorum, database[i]); }; - + totallength+=database[i]->SQ_length; + getrusage(RUSAGE_SELF,&usage); + if (options->statistics) + { + asprintf(&logfilename,"ecoprimer_%d.log",getpid()); + logfile = fopen(logfilename,"a"); + seconde = timeval_subtract(&(usage.ru_utime),&(start.ru_utime)) + + timeval_subtract(&(usage.ru_stime),&(start.ru_stime)); + fprintf(logfile,"%d\t%llu\t%lu\t%8.3f\t%8.3e\n",i, + totallength,strictprimers->size*sizeof(int64_t), + seconde,seconde/(double)totallength); + fclose(logfile); + } } else strictprimers->outseqcount++;