2007-06-01 15:08:15 +00:00
|
|
|
#include "libecoPCR/ecoPCR.h"
|
|
|
|
#include <regex.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <getopt.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#define VERSION "0.1"
|
|
|
|
|
|
|
|
/**
|
|
|
|
* display the result in sdtout
|
|
|
|
**/
|
2007-06-04 15:34:00 +00:00
|
|
|
|
|
|
|
static void printresult(ecotx_t *taxon,ecotaxonomy_t *taxonomy){
|
|
|
|
char* rankname;
|
|
|
|
|
|
|
|
rankname= taxonomy->ranks->label[taxon->rank];
|
|
|
|
|
|
|
|
printf("%10d \t| %15s \t|\t %s\n",
|
|
|
|
taxon->taxid,
|
|
|
|
rankname,
|
|
|
|
taxon->name);
|
2007-06-01 15:08:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* printout usage and exit
|
|
|
|
**/
|
|
|
|
#define PP fprintf(stderr,
|
|
|
|
|
|
|
|
static void ExitUsage(stat)
|
|
|
|
int stat;
|
|
|
|
{
|
2007-06-04 15:34:00 +00:00
|
|
|
PP "usage: ecofind [-d datafile] [-h] <taxon name pattern> ... \n");
|
2007-06-01 15:08:15 +00:00
|
|
|
PP "type \"ecofind -h\" for help\n");
|
|
|
|
if (stat)
|
|
|
|
exit(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef PP
|
|
|
|
|
|
|
|
/**
|
|
|
|
* printout help
|
|
|
|
**/
|
|
|
|
#define PP fprintf(stdout,
|
|
|
|
|
|
|
|
static void PrintHelp()
|
|
|
|
{
|
|
|
|
PP "------------------------------------------\n");
|
|
|
|
PP " ecofind Version %s\n", VERSION);
|
|
|
|
PP "------------------------------------------\n");
|
|
|
|
PP "synopsis : searching for scientific name, taxonomic\n");
|
|
|
|
PP " rank and taxonomy id for given regular\n");
|
|
|
|
PP " expression patterns\n");
|
|
|
|
PP "usage: ecofind [options]\n");
|
|
|
|
PP "------------------------------------------\n");
|
|
|
|
PP "options:\n");
|
|
|
|
PP "-d : [D]atafile containing the taxonomy\n");
|
|
|
|
PP " to match the expected format, the database\n");
|
|
|
|
PP " has to be formated first by the ecoPCRFormat.py\n");
|
|
|
|
PP " program located in the tools directory.\n");
|
|
|
|
PP " Write the datafile radical without any extension.\n\n");
|
|
|
|
PP "-h : [H]elp - print <this> help\n\n");
|
2007-06-04 15:34:00 +00:00
|
|
|
PP "<taxon> name pattern bearing regular expressions\n\n");
|
2007-06-01 15:08:15 +00:00
|
|
|
PP "------------------------------------------\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------- */
|
|
|
|
|
|
|
|
#define PATTERN_NUMBER 10
|
|
|
|
#define PATTERN_LENGHT 40
|
|
|
|
#define RESULT_LENGTH 100
|
|
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
int32_t carg = 0;
|
|
|
|
int32_t nummatch = 0;
|
|
|
|
int32_t k,j = 0;
|
|
|
|
int32_t errflag = 0;
|
2007-06-04 15:34:00 +00:00
|
|
|
int32_t tax_count = 0;
|
2007-06-01 15:08:15 +00:00
|
|
|
char *prefix = NULL;
|
2007-06-04 15:34:00 +00:00
|
|
|
ecotx_t *taxon = NULL;
|
|
|
|
ecotaxonomy_t *taxonomy;
|
2007-06-01 15:08:15 +00:00
|
|
|
|
|
|
|
int re_error;
|
|
|
|
int re_match;
|
|
|
|
regex_t re_preg;
|
|
|
|
|
2007-06-04 15:34:00 +00:00
|
|
|
while ((carg = getopt(argc, argv, "hd:")) != -1) {
|
|
|
|
switch (carg) {
|
|
|
|
case 'd': /* path to the database */
|
|
|
|
prefix = optarg;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'h': /* display help */
|
|
|
|
PrintHelp();
|
|
|
|
exit(0);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '?': /* bad option */
|
|
|
|
errflag++;
|
|
|
|
}
|
2007-06-01 15:08:15 +00:00
|
|
|
}
|
|
|
|
|
2007-06-04 15:34:00 +00:00
|
|
|
/**
|
|
|
|
* check the path to the database is given as last argument
|
|
|
|
**/
|
|
|
|
if ((argc - optind) < 1)
|
|
|
|
errflag++;
|
2007-06-01 15:08:15 +00:00
|
|
|
|
|
|
|
if (errflag)
|
|
|
|
ExitUsage(errflag);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* load taxonomy using libecoPCR functions
|
|
|
|
**/
|
2007-06-04 15:34:00 +00:00
|
|
|
printf("# \n# opening %s database\n",prefix);
|
2007-06-01 15:08:15 +00:00
|
|
|
|
|
|
|
taxonomy = read_taxonomy(prefix);
|
2007-06-04 15:34:00 +00:00
|
|
|
tax_count = taxonomy->taxons->count;
|
2007-06-01 15:08:15 +00:00
|
|
|
|
2007-06-04 15:34:00 +00:00
|
|
|
printf("# %d taxons\n", tax_count);
|
2007-06-01 15:08:15 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* parse taxonomy
|
|
|
|
**/
|
2007-06-04 15:34:00 +00:00
|
|
|
for (k=optind;k<argc;k++)
|
2007-06-01 15:08:15 +00:00
|
|
|
{
|
2007-06-04 15:34:00 +00:00
|
|
|
printf("#\n# searching for '%s' pattern\n",argv[k]);
|
|
|
|
|
|
|
|
re_error = regcomp (&re_preg, argv[k], REG_NOSUB | REG_EXTENDED | REG_ICASE);
|
|
|
|
if (re_error)
|
|
|
|
{
|
|
|
|
fprintf(stderr,"# misformed pattern '%s'\n",argv[k]);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2007-06-01 15:08:15 +00:00
|
|
|
nummatch=0;
|
|
|
|
|
2007-06-04 15:34:00 +00:00
|
|
|
printf("# taxonomy id \t|\t taxonomy rank \t|\t scientific name\n#\n");
|
|
|
|
|
|
|
|
for (j=0,taxon=taxonomy->taxons->taxon;
|
|
|
|
j < tax_count;
|
|
|
|
taxon++,j++)
|
|
|
|
{
|
|
|
|
re_match = regexec (&re_preg, taxon->name, 0, NULL, 0);
|
|
|
|
if (re_match == 0)
|
|
|
|
{
|
|
|
|
printresult(taxon,taxonomy);
|
|
|
|
nummatch++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("# %d records found \n",nummatch);
|
|
|
|
regfree(&re_preg);
|
2007-06-01 15:08:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|