#include "libecoPCR/ecoPCR.h" #include #include #include #include #include #define VERSION "0.1" /* ----------------------------------------------- */ /* structures */ /* ----------------------------------------------- */ typedef struct { char *sci_name; // scientific name int32_t tax_id; // tax id use ecoPCR prefilter int32_t tax_rank; // taxonomic rank } tax_info ; /* ----------------------------------------------- */ /* functions */ /* ----------------------------------------------- */ /** * display the result in sdtout **/ static void printresult(tax_info **taxonlist,int *j){ int i; printf("taxonomy rank \t|\t taxonomy id \t|\t scientific name\n\n"); for(i=0;i<*j;i++){ printf("%d \t|\t %d \t|\t %s\n", taxonlist[i]->tax_id, taxonlist[i]->tax_rank, taxonlist[i]->sci_name); } } /** * printout usage and exit **/ #define PP fprintf(stderr, static void ExitUsage(stat) int stat; { PP "usage: ecofind [-d datafile] [-t taxon] [-h]\n"); PP "type \"ecofind -h\" for help\n"); if (stat) exit(stat); } #undef PP /** * printout help **/ #define PP fprintf(stdout, static void PrintHelp() { PP "------------------------------------------\n"); PP " ecofind Version %s\n", VERSION); PP "------------------------------------------\n"); PP "synopsis : searching for scientific name, taxonomic\n"); PP " rank and taxonomy id for given regular\n"); PP " expression patterns\n"); PP "usage: ecofind [options]\n"); PP "------------------------------------------\n"); PP "options:\n"); PP "-d : [D]atafile containing the taxonomy\n"); PP " to match the expected format, the database\n"); PP " has to be formated first by the ecoPCRFormat.py\n"); PP " program located in the tools directory.\n"); PP " Write the datafile radical without any extension.\n\n"); PP "-h : [H]elp - print help\n\n"); PP "-t : [T]axonomic pattern bearing regular expressions\n\n"); PP "------------------------------------------\n"); } /* ----------------------------------------------- */ #define PATTERN_NUMBER 10 #define PATTERN_LENGHT 40 #define RESULT_LENGTH 100 int main(int argc, char **argv) { int32_t carg = 0; int32_t numpattern = 0; /* stores number of pattern */ int32_t nummatch = 0; int32_t k,j = 0; int32_t errflag = 0; char *prefix = NULL; char *taxon = NULL; char pattern[PATTERN_NUMBER][PATTERN_LENGHT]; /* stores the pattern */ tax_info **taxonlist = ECOMALLOC(sizeof(tax_info) * RESULT_LENGTH, /* stores the result */ "error in **taxonlist allocation"); ecotaxonomy_t *taxonomy; /* stores all the taxonomy */ int re_error; int re_match; regex_t re_preg; while ((carg = getopt(argc, argv, "hd:t:")) != -1) { switch (carg) { case 'd': /* path to the database */ prefix = optarg; break; case 't': /* pattern list */ strncpy(pattern[numpattern],optarg,PATTERN_LENGHT); numpattern++; break; case 'h': /* display help */ PrintHelp(); exit(0); break; case '?': /* bad option */ errflag++; } } /** * check data integrity * and exit if needed **/ if (!prefix || !pattern) errflag++; if (taxonlist == NULL) errflag++; if (errflag) ExitUsage(errflag); /** * load taxonomy using libecoPCR functions **/ printf("-- opening %s database --\n",prefix); taxonomy = read_taxonomy(prefix); printf("-- %d taxons --\n", taxonomy->taxons->count); /** * parse taxonomy **/ for (k=0;ktaxons->taxon[j].name; re_error = regcomp (&re_preg, pattern[k], REG_NOSUB | REG_EXTENDED); if (re_error == 0) { re_match = regexec (&re_preg, taxon, 0, NULL, 0); regfree(&re_preg); if (re_match == 0) { taxonlist[nummatch] = ECOMALLOC(sizeof(taxonlist), "error in taxon j allocation"); taxonlist[nummatch]->sci_name = taxon; taxonlist[nummatch]->tax_id = taxonomy->taxons->taxon[j].taxid; taxonlist[nummatch]->tax_rank = taxonomy->taxons->taxon[j].rank; nummatch++; } } j++; } while(j < taxonomy->taxons->count); printf("-- %d records found --\n\n",nummatch); if (nummatch > 0) printresult(taxonlist, &nummatch); } return 0; }