git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@22 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
185
src/ecofind.c
Normal file
185
src/ecofind.c
Normal file
@ -0,0 +1,185 @@
|
||||
#include "libecoPCR/ecoPCR.h"
|
||||
#include <regex.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <getopt.h>
|
||||
#include <stdio.h>
|
||||
#define VERSION "0.1"
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* structures */
|
||||
/* ----------------------------------------------- */
|
||||
typedef struct {
|
||||
char *sci_name; // scientific name
|
||||
int32_t tax_id; // tax id use ecoPCR prefilter
|
||||
int32_t tax_rank; // taxonomic rank
|
||||
} tax_info ;
|
||||
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* functions */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
/**
|
||||
* display the result in sdtout
|
||||
**/
|
||||
static void printresult(tax_info **taxonlist,int *j){
|
||||
int i;
|
||||
printf("taxonomy rank \t|\t taxonomy id \t|\t scientific name\n\n");
|
||||
for(i=0;i<*j;i++){
|
||||
printf("%d \t|\t %d \t|\t %s\n",
|
||||
taxonlist[i]->tax_id,
|
||||
taxonlist[i]->tax_rank,
|
||||
taxonlist[i]->sci_name);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* printout usage and exit
|
||||
**/
|
||||
#define PP fprintf(stderr,
|
||||
|
||||
static void ExitUsage(stat)
|
||||
int stat;
|
||||
{
|
||||
PP "usage: ecofind [-d datafile] [-t taxon] [-h]\n");
|
||||
PP "type \"ecofind -h\" for help\n");
|
||||
if (stat)
|
||||
exit(stat);
|
||||
}
|
||||
|
||||
#undef PP
|
||||
|
||||
/**
|
||||
* printout help
|
||||
**/
|
||||
#define PP fprintf(stdout,
|
||||
|
||||
static void PrintHelp()
|
||||
{
|
||||
PP "------------------------------------------\n");
|
||||
PP " ecofind Version %s\n", VERSION);
|
||||
PP "------------------------------------------\n");
|
||||
PP "synopsis : searching for scientific name, taxonomic\n");
|
||||
PP " rank and taxonomy id for given regular\n");
|
||||
PP " expression patterns\n");
|
||||
PP "usage: ecofind [options]\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "options:\n");
|
||||
PP "-d : [D]atafile containing the taxonomy\n");
|
||||
PP " to match the expected format, the database\n");
|
||||
PP " has to be formated first by the ecoPCRFormat.py\n");
|
||||
PP " program located in the tools directory.\n");
|
||||
PP " Write the datafile radical without any extension.\n\n");
|
||||
PP "-h : [H]elp - print <this> help\n\n");
|
||||
PP "-t : [T]axonomic pattern bearing regular expressions\n\n");
|
||||
PP "------------------------------------------\n");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#define PATTERN_NUMBER 10
|
||||
#define PATTERN_LENGHT 40
|
||||
#define RESULT_LENGTH 100
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int32_t carg = 0;
|
||||
int32_t numpattern = 0; /* stores number of pattern */
|
||||
int32_t nummatch = 0;
|
||||
int32_t k,j = 0;
|
||||
int32_t errflag = 0;
|
||||
char *prefix = NULL;
|
||||
char *taxon = NULL;
|
||||
char pattern[PATTERN_NUMBER][PATTERN_LENGHT]; /* stores the pattern */
|
||||
tax_info **taxonlist = ECOMALLOC(sizeof(tax_info) * RESULT_LENGTH, /* stores the result */
|
||||
"error in **taxonlist allocation");
|
||||
ecotaxonomy_t *taxonomy; /* stores all the taxonomy */
|
||||
|
||||
int re_error;
|
||||
int re_match;
|
||||
regex_t re_preg;
|
||||
|
||||
while ((carg = getopt(argc, argv, "hd:t:")) != -1) {
|
||||
switch (carg) {
|
||||
case 'd': /* path to the database */
|
||||
prefix = optarg;
|
||||
break;
|
||||
|
||||
case 't': /* pattern list */
|
||||
strncpy(pattern[numpattern],optarg,PATTERN_LENGHT);
|
||||
numpattern++;
|
||||
break;
|
||||
|
||||
case 'h': /* display help */
|
||||
PrintHelp();
|
||||
exit(0);
|
||||
break;
|
||||
|
||||
case '?': /* bad option */
|
||||
errflag++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check data integrity
|
||||
* and exit if needed
|
||||
**/
|
||||
if (!prefix || !pattern)
|
||||
errflag++;
|
||||
|
||||
if (taxonlist == NULL)
|
||||
errflag++;
|
||||
|
||||
if (errflag)
|
||||
ExitUsage(errflag);
|
||||
|
||||
/**
|
||||
* load taxonomy using libecoPCR functions
|
||||
**/
|
||||
printf("-- opening %s database --\n",prefix);
|
||||
|
||||
taxonomy = read_taxonomy(prefix);
|
||||
|
||||
printf("-- %d taxons --\n", taxonomy->taxons->count);
|
||||
|
||||
/**
|
||||
* parse taxonomy
|
||||
**/
|
||||
for (k=0;k<numpattern;k++)
|
||||
{
|
||||
j=0;
|
||||
nummatch=0;
|
||||
printf("\n-- searching for %s pattern --\n",pattern[k]);
|
||||
do {
|
||||
taxon = taxonomy->taxons->taxon[j].name;
|
||||
re_error = regcomp (&re_preg, pattern[k], REG_NOSUB | REG_EXTENDED);
|
||||
if (re_error == 0)
|
||||
{
|
||||
re_match = regexec (&re_preg, taxon, 0, NULL, 0);
|
||||
regfree(&re_preg);
|
||||
if (re_match == 0)
|
||||
{
|
||||
taxonlist[nummatch] = ECOMALLOC(sizeof(taxonlist),
|
||||
"error in taxon j allocation");
|
||||
taxonlist[nummatch]->sci_name = taxon;
|
||||
taxonlist[nummatch]->tax_id = taxonomy->taxons->taxon[j].taxid;
|
||||
taxonlist[nummatch]->tax_rank = taxonomy->taxons->taxon[j].rank;
|
||||
nummatch++;
|
||||
}
|
||||
}
|
||||
j++;
|
||||
} while(j < taxonomy->taxons->count);
|
||||
|
||||
printf("-- %d records found --\n\n",nummatch);
|
||||
|
||||
if (nummatch > 0)
|
||||
printresult(taxonlist, &nummatch);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user