Files
ecopcr/src/ecofind.c

160 lines
3.8 KiB
C

#include "libecoPCR/ecoPCR.h"
#include <regex.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <stdio.h>
#define VERSION "0.1"
/**
* display the result in sdtout
**/
static void printresult(ecotx_t *taxon,ecotaxonomy_t *taxonomy){
char* rankname;
rankname= taxonomy->ranks->label[taxon->rank];
printf("%10d \t| %15s \t|\t %s\n",
taxon->taxid,
rankname,
taxon->name);
}
/**
* printout usage and exit
**/
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecofind [-d datafile] [-h] <taxon name pattern> ... \n");
PP "type \"ecofind -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/**
* printout help
**/
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "------------------------------------------\n");
PP " ecofind Version %s\n", VERSION);
PP "------------------------------------------\n");
PP "synopsis : searching for scientific name, taxonomic\n");
PP " rank and taxonomy id for given regular\n");
PP " expression patterns\n");
PP "usage: ecofind [options]\n");
PP "------------------------------------------\n");
PP "options:\n");
PP "-d : [D]atafile containing the taxonomy\n");
PP " to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py\n");
PP " program located in the tools directory.\n");
PP " Write the datafile radical without any extension.\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "<taxon> name pattern bearing regular expressions\n\n");
PP "------------------------------------------\n");
}
/* ----------------------------------------------- */
#define PATTERN_NUMBER 10
#define PATTERN_LENGHT 40
#define RESULT_LENGTH 100
int main(int argc, char **argv)
{
int32_t carg = 0;
int32_t nummatch = 0;
int32_t k,j = 0;
int32_t errflag = 0;
int32_t tax_count = 0;
char *prefix = NULL;
ecotx_t *taxon = NULL;
ecotaxonomy_t *taxonomy;
int re_error;
int re_match;
regex_t re_preg;
while ((carg = getopt(argc, argv, "hd:")) != -1) {
switch (carg) {
case 'd': /* path to the database */
prefix = optarg;
break;
case 'h': /* display help */
PrintHelp();
exit(0);
break;
case '?': /* bad option */
errflag++;
}
}
/**
* check the path to the database is given as last argument
**/
if ((argc - optind) < 1)
errflag++;
if (errflag)
ExitUsage(errflag);
/**
* load taxonomy using libecoPCR functions
**/
printf("# \n# opening %s database\n",prefix);
taxonomy = read_taxonomy(prefix);
tax_count = taxonomy->taxons->count;
printf("# %d taxons\n", tax_count);
/**
* parse taxonomy
**/
for (k=optind;k<argc;k++)
{
printf("#\n# searching for '%s' pattern\n",argv[k]);
re_error = regcomp (&re_preg, argv[k], REG_NOSUB | REG_EXTENDED | REG_ICASE);
if (re_error)
{
fprintf(stderr,"# misformed pattern '%s'\n",argv[k]);
exit(1);
}
nummatch=0;
printf("# taxonomy id \t|\t taxonomy rank \t|\t scientific name\n#\n");
for (j=0,taxon=taxonomy->taxons->taxon;
j < tax_count;
taxon++,j++)
{
re_match = regexec (&re_preg, taxon->name, 0, NULL, 0);
if (re_match == 0)
{
printresult(taxon,taxonomy);
nummatch++;
}
}
printf("# %d records found \n",nummatch);
regfree(&re_preg);
}
return 0;
}