From f64bb169092c986b60bb48ed21d1842940ca84c5 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 1 Jun 2007 15:08:15 +0000 Subject: [PATCH] git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@22 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- src/ecofind.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 src/ecofind.c diff --git a/src/ecofind.c b/src/ecofind.c new file mode 100644 index 0000000..ffc443a --- /dev/null +++ b/src/ecofind.c @@ -0,0 +1,185 @@ +#include "libecoPCR/ecoPCR.h" +#include +#include +#include +#include +#include +#define VERSION "0.1" + + +/* ----------------------------------------------- */ +/* structures */ +/* ----------------------------------------------- */ +typedef struct { + char *sci_name; // scientific name + int32_t tax_id; // tax id use ecoPCR prefilter + int32_t tax_rank; // taxonomic rank +} tax_info ; + + + +/* ----------------------------------------------- */ +/* functions */ +/* ----------------------------------------------- */ + +/** + * display the result in sdtout + **/ +static void printresult(tax_info **taxonlist,int *j){ + int i; + printf("taxonomy rank \t|\t taxonomy id \t|\t scientific name\n\n"); + for(i=0;i<*j;i++){ + printf("%d \t|\t %d \t|\t %s\n", + taxonlist[i]->tax_id, + taxonlist[i]->tax_rank, + taxonlist[i]->sci_name); + } +} + +/** + * printout usage and exit + **/ +#define PP fprintf(stderr, + +static void ExitUsage(stat) + int stat; +{ + PP "usage: ecofind [-d datafile] [-t taxon] [-h]\n"); + PP "type \"ecofind -h\" for help\n"); + if (stat) + exit(stat); +} + +#undef PP + +/** + * printout help + **/ +#define PP fprintf(stdout, + +static void PrintHelp() +{ + PP "------------------------------------------\n"); + PP " ecofind Version %s\n", VERSION); + PP "------------------------------------------\n"); + PP "synopsis : searching for scientific name, taxonomic\n"); + PP " rank and taxonomy id for given regular\n"); + PP " expression patterns\n"); + PP "usage: ecofind [options]\n"); + PP "------------------------------------------\n"); + PP "options:\n"); + PP "-d : [D]atafile containing the taxonomy\n"); + PP " to match the expected format, the database\n"); + PP " has to be formated first by the ecoPCRFormat.py\n"); + PP " program located in the tools directory.\n"); + PP " Write the datafile radical without any extension.\n\n"); + PP "-h : [H]elp - print help\n\n"); + PP "-t : [T]axonomic pattern bearing regular expressions\n\n"); + PP "------------------------------------------\n"); +} + +/* ----------------------------------------------- */ + +#define PATTERN_NUMBER 10 +#define PATTERN_LENGHT 40 +#define RESULT_LENGTH 100 + +int main(int argc, char **argv) +{ + int32_t carg = 0; + int32_t numpattern = 0; /* stores number of pattern */ + int32_t nummatch = 0; + int32_t k,j = 0; + int32_t errflag = 0; + char *prefix = NULL; + char *taxon = NULL; + char pattern[PATTERN_NUMBER][PATTERN_LENGHT]; /* stores the pattern */ + tax_info **taxonlist = ECOMALLOC(sizeof(tax_info) * RESULT_LENGTH, /* stores the result */ + "error in **taxonlist allocation"); + ecotaxonomy_t *taxonomy; /* stores all the taxonomy */ + + int re_error; + int re_match; + regex_t re_preg; + + while ((carg = getopt(argc, argv, "hd:t:")) != -1) { + switch (carg) { + case 'd': /* path to the database */ + prefix = optarg; + break; + + case 't': /* pattern list */ + strncpy(pattern[numpattern],optarg,PATTERN_LENGHT); + numpattern++; + break; + + case 'h': /* display help */ + PrintHelp(); + exit(0); + break; + + case '?': /* bad option */ + errflag++; + } + } + + /** + * check data integrity + * and exit if needed + **/ + if (!prefix || !pattern) + errflag++; + + if (taxonlist == NULL) + errflag++; + + if (errflag) + ExitUsage(errflag); + + /** + * load taxonomy using libecoPCR functions + **/ + printf("-- opening %s database --\n",prefix); + + taxonomy = read_taxonomy(prefix); + + printf("-- %d taxons --\n", taxonomy->taxons->count); + + /** + * parse taxonomy + **/ + for (k=0;ktaxons->taxon[j].name; + re_error = regcomp (&re_preg, pattern[k], REG_NOSUB | REG_EXTENDED); + if (re_error == 0) + { + re_match = regexec (&re_preg, taxon, 0, NULL, 0); + regfree(&re_preg); + if (re_match == 0) + { + taxonlist[nummatch] = ECOMALLOC(sizeof(taxonlist), + "error in taxon j allocation"); + taxonlist[nummatch]->sci_name = taxon; + taxonlist[nummatch]->tax_id = taxonomy->taxons->taxon[j].taxid; + taxonlist[nummatch]->tax_rank = taxonomy->taxons->taxon[j].rank; + nummatch++; + } + } + j++; + } while(j < taxonomy->taxons->count); + + printf("-- %d records found --\n\n",nummatch); + + if (nummatch > 0) + printresult(taxonlist, &nummatch); + } + + return 0; +} + +