This commit is contained in:
2007-06-01 15:08:15 +00:00
parent 736a5b384f
commit f64bb16909

185
src/ecofind.c Normal file
View File

@ -0,0 +1,185 @@
#include "libecoPCR/ecoPCR.h"
#include <regex.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <stdio.h>
#define VERSION "0.1"
/* ----------------------------------------------- */
/* structures */
/* ----------------------------------------------- */
typedef struct {
char *sci_name; // scientific name
int32_t tax_id; // tax id use ecoPCR prefilter
int32_t tax_rank; // taxonomic rank
} tax_info ;
/* ----------------------------------------------- */
/* functions */
/* ----------------------------------------------- */
/**
* display the result in sdtout
**/
static void printresult(tax_info **taxonlist,int *j){
int i;
printf("taxonomy rank \t|\t taxonomy id \t|\t scientific name\n\n");
for(i=0;i<*j;i++){
printf("%d \t|\t %d \t|\t %s\n",
taxonlist[i]->tax_id,
taxonlist[i]->tax_rank,
taxonlist[i]->sci_name);
}
}
/**
* printout usage and exit
**/
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecofind [-d datafile] [-t taxon] [-h]\n");
PP "type \"ecofind -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/**
* printout help
**/
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "------------------------------------------\n");
PP " ecofind Version %s\n", VERSION);
PP "------------------------------------------\n");
PP "synopsis : searching for scientific name, taxonomic\n");
PP " rank and taxonomy id for given regular\n");
PP " expression patterns\n");
PP "usage: ecofind [options]\n");
PP "------------------------------------------\n");
PP "options:\n");
PP "-d : [D]atafile containing the taxonomy\n");
PP " to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py\n");
PP " program located in the tools directory.\n");
PP " Write the datafile radical without any extension.\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-t : [T]axonomic pattern bearing regular expressions\n\n");
PP "------------------------------------------\n");
}
/* ----------------------------------------------- */
#define PATTERN_NUMBER 10
#define PATTERN_LENGHT 40
#define RESULT_LENGTH 100
int main(int argc, char **argv)
{
int32_t carg = 0;
int32_t numpattern = 0; /* stores number of pattern */
int32_t nummatch = 0;
int32_t k,j = 0;
int32_t errflag = 0;
char *prefix = NULL;
char *taxon = NULL;
char pattern[PATTERN_NUMBER][PATTERN_LENGHT]; /* stores the pattern */
tax_info **taxonlist = ECOMALLOC(sizeof(tax_info) * RESULT_LENGTH, /* stores the result */
"error in **taxonlist allocation");
ecotaxonomy_t *taxonomy; /* stores all the taxonomy */
int re_error;
int re_match;
regex_t re_preg;
while ((carg = getopt(argc, argv, "hd:t:")) != -1) {
switch (carg) {
case 'd': /* path to the database */
prefix = optarg;
break;
case 't': /* pattern list */
strncpy(pattern[numpattern],optarg,PATTERN_LENGHT);
numpattern++;
break;
case 'h': /* display help */
PrintHelp();
exit(0);
break;
case '?': /* bad option */
errflag++;
}
}
/**
* check data integrity
* and exit if needed
**/
if (!prefix || !pattern)
errflag++;
if (taxonlist == NULL)
errflag++;
if (errflag)
ExitUsage(errflag);
/**
* load taxonomy using libecoPCR functions
**/
printf("-- opening %s database --\n",prefix);
taxonomy = read_taxonomy(prefix);
printf("-- %d taxons --\n", taxonomy->taxons->count);
/**
* parse taxonomy
**/
for (k=0;k<numpattern;k++)
{
j=0;
nummatch=0;
printf("\n-- searching for %s pattern --\n",pattern[k]);
do {
taxon = taxonomy->taxons->taxon[j].name;
re_error = regcomp (&re_preg, pattern[k], REG_NOSUB | REG_EXTENDED);
if (re_error == 0)
{
re_match = regexec (&re_preg, taxon, 0, NULL, 0);
regfree(&re_preg);
if (re_match == 0)
{
taxonlist[nummatch] = ECOMALLOC(sizeof(taxonlist),
"error in taxon j allocation");
taxonlist[nummatch]->sci_name = taxon;
taxonlist[nummatch]->tax_id = taxonomy->taxons->taxon[j].taxid;
taxonlist[nummatch]->tax_rank = taxonomy->taxons->taxon[j].rank;
nummatch++;
}
}
j++;
} while(j < taxonomy->taxons->count);
printf("-- %d records found --\n\n",nummatch);
if (nummatch > 0)
printresult(taxonlist, &nummatch);
}
return 0;
}