This commit is contained in:
2007-06-04 15:34:00 +00:00
parent 5094a9a9ce
commit 7d9ab96dc1

View File

@ -6,34 +6,19 @@
#include <stdio.h> #include <stdio.h>
#define VERSION "0.1" #define VERSION "0.1"
/* ----------------------------------------------- */
/* structures */
/* ----------------------------------------------- */
typedef struct {
char *sci_name; // scientific name
int32_t tax_id; // tax id use ecoPCR prefilter
int32_t tax_rank; // taxonomic rank
} tax_info ;
/* ----------------------------------------------- */
/* functions */
/* ----------------------------------------------- */
/** /**
* display the result in sdtout * display the result in sdtout
**/ **/
static void printresult(tax_info **taxonlist,int *j){
int i; static void printresult(ecotx_t *taxon,ecotaxonomy_t *taxonomy){
printf("taxonomy rank \t|\t taxonomy id \t|\t scientific name\n\n"); char* rankname;
for(i=0;i<*j;i++){
printf("%d \t|\t %d \t|\t %s\n", rankname= taxonomy->ranks->label[taxon->rank];
taxonlist[i]->tax_id,
taxonlist[i]->tax_rank, printf("%10d \t| %15s \t|\t %s\n",
taxonlist[i]->sci_name); taxon->taxid,
} rankname,
taxon->name);
} }
/** /**
@ -44,7 +29,7 @@ static void printresult(tax_info **taxonlist,int *j){
static void ExitUsage(stat) static void ExitUsage(stat)
int stat; int stat;
{ {
PP "usage: ecofind [-d datafile] [-t taxon] [-h]\n"); PP "usage: ecofind [-d datafile] [-h] <taxon name pattern> ... \n");
PP "type \"ecofind -h\" for help\n"); PP "type \"ecofind -h\" for help\n");
if (stat) if (stat)
exit(stat); exit(stat);
@ -74,7 +59,7 @@ static void PrintHelp()
PP " program located in the tools directory.\n"); PP " program located in the tools directory.\n");
PP " Write the datafile radical without any extension.\n\n"); PP " Write the datafile radical without any extension.\n\n");
PP "-h : [H]elp - print <this> help\n\n"); PP "-h : [H]elp - print <this> help\n\n");
PP "-t : [T]axonomic pattern bearing regular expressions\n\n"); PP "<taxon> name pattern bearing regular expressions\n\n");
PP "------------------------------------------\n"); PP "------------------------------------------\n");
} }
@ -87,51 +72,39 @@ static void PrintHelp()
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int32_t carg = 0; int32_t carg = 0;
int32_t numpattern = 0; /* stores number of pattern */
int32_t nummatch = 0; int32_t nummatch = 0;
int32_t k,j = 0; int32_t k,j = 0;
int32_t errflag = 0; int32_t errflag = 0;
int32_t tax_count = 0;
char *prefix = NULL; char *prefix = NULL;
char *taxon = NULL; ecotx_t *taxon = NULL;
char pattern[PATTERN_NUMBER][PATTERN_LENGHT]; /* stores the pattern */ ecotaxonomy_t *taxonomy;
tax_info **taxonlist = ECOMALLOC(sizeof(tax_info) * RESULT_LENGTH, /* stores the result */
"error in **taxonlist allocation");
ecotaxonomy_t *taxonomy; /* stores all the taxonomy */
int re_error; int re_error;
int re_match; int re_match;
regex_t re_preg; regex_t re_preg;
while ((carg = getopt(argc, argv, "hd:t:")) != -1) { while ((carg = getopt(argc, argv, "hd:")) != -1) {
switch (carg) { switch (carg) {
case 'd': /* path to the database */ case 'd': /* path to the database */
prefix = optarg; prefix = optarg;
break; break;
case 't': /* pattern list */ case 'h': /* display help */
strncpy(pattern[numpattern],optarg,PATTERN_LENGHT); PrintHelp();
numpattern++; exit(0);
break; break;
case 'h': /* display help */ case '?': /* bad option */
PrintHelp(); errflag++;
exit(0); }
break;
case '?': /* bad option */
errflag++;
}
} }
/** /**
* check data integrity * check the path to the database is given as last argument
* and exit if needed
**/ **/
if (!prefix || !pattern) if ((argc - optind) < 1)
errflag++; errflag++;
if (taxonlist == NULL)
errflag++;
if (errflag) if (errflag)
ExitUsage(errflag); ExitUsage(errflag);
@ -139,44 +112,45 @@ int main(int argc, char **argv)
/** /**
* load taxonomy using libecoPCR functions * load taxonomy using libecoPCR functions
**/ **/
printf("-- opening %s database --\n",prefix); printf("# \n# opening %s database\n",prefix);
taxonomy = read_taxonomy(prefix); taxonomy = read_taxonomy(prefix);
tax_count = taxonomy->taxons->count;
printf("-- %d taxons --\n", taxonomy->taxons->count); printf("# %d taxons\n", tax_count);
/** /**
* parse taxonomy * parse taxonomy
**/ **/
for (k=0;k<numpattern;k++) for (k=optind;k<argc;k++)
{ {
j=0; printf("#\n# searching for '%s' pattern\n",argv[k]);
re_error = regcomp (&re_preg, argv[k], REG_NOSUB | REG_EXTENDED | REG_ICASE);
if (re_error)
{
fprintf(stderr,"# misformed pattern '%s'\n",argv[k]);
exit(1);
}
nummatch=0; nummatch=0;
printf("\n-- searching for %s pattern --\n",pattern[k]);
do {
taxon = taxonomy->taxons->taxon[j].name;
re_error = regcomp (&re_preg, pattern[k], REG_NOSUB | REG_EXTENDED);
if (re_error == 0)
{
re_match = regexec (&re_preg, taxon, 0, NULL, 0);
regfree(&re_preg);
if (re_match == 0)
{
taxonlist[nummatch] = ECOMALLOC(sizeof(taxonlist),
"error in taxon j allocation");
taxonlist[nummatch]->sci_name = taxon;
taxonlist[nummatch]->tax_id = taxonomy->taxons->taxon[j].taxid;
taxonlist[nummatch]->tax_rank = taxonomy->taxons->taxon[j].rank;
nummatch++;
}
}
j++;
} while(j < taxonomy->taxons->count);
printf("-- %d records found --\n\n",nummatch); printf("# taxonomy id \t|\t taxonomy rank \t|\t scientific name\n#\n");
if (nummatch > 0) for (j=0,taxon=taxonomy->taxons->taxon;
printresult(taxonlist, &nummatch); j < tax_count;
taxon++,j++)
{
re_match = regexec (&re_preg, taxon->name, 0, NULL, 0);
if (re_match == 0)
{
printresult(taxon,taxonomy);
nummatch++;
}
}
printf("# %d records found \n",nummatch);
regfree(&re_preg);
} }
return 0; return 0;