diff --git a/src/ecogrep.c b/src/ecogrep.c new file mode 100644 index 0000000..6d0704c --- /dev/null +++ b/src/ecogrep.c @@ -0,0 +1,289 @@ +#include "libecoPCR/ecoPCR.h" +#include +#include +#include +#include + +#define VERSION "0.1" + +void freememory(char **tab, int32_t num){ + int32_t i; + for (i=0;itaxid); + seq->AC = strdup(line[0]); + //seq->DE = strdup(line[]); + seq->SQ = strdup(line[18]); + seq->SQ_length = strlen(line[18]); + + apatseq=ecoseq2apatseq(seq,apatseq); + + for (i=0; i < numpattern ;i++){ + current_patt = buildPattern(pattern[i],error_max); + if(ManberAll(apatseq,current_patt,0,0,apatseq->seqlen)) + return 1; + } + return 0; +} + +void printline(char **line, int32_t i){ + int32_t k=0; + for (k=0; k < i; k++) + printf("%s |",line[k]); + printf("\n\n"); +} + +/* ----------------------------------------------- */ +/* printout help */ +/* ----------------------------------------------- */ +#define PP fprintf(stdout, + +static void PrintHelp() +{ + PP "\n------------------------------------------\n"); + PP " ecogrep Version %s\n", VERSION); + PP "------------------------------------------\n"); + PP " synopsis : filtering ecoPCR result based on\n"); + PP " taxonomic id filter and regular expression pattern\n"); + PP " usage: ecogrep [options] database\n"); + PP "------------------------------------------\n"); + PP " options:\n"); + PP " -d : [D]atabase containing taxonomic information\n\n"); + PP " -f : [F]ile name : ecoPCR ouput file\n\n"); + PP " -h : [H]elp - print help\n\n"); + PP " -i : [I]gnore taxonomic id\n\n"); + PP " -r : [R]estrict taxomic id\n\n"); + PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n"); + PP "------------------------------------------\n"); + PP " Pattern : oligonucleotide pattern\n");; + PP "------------------------------------------\n\n"); + PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki"); + PP "------------------------------------------\n\n"); +} + +#undef PP + +/* ----------------------------------------------- */ +/* printout usage and exit */ +/* ----------------------------------------------- */ + +#define PP fprintf(stderr, + +static void ExitUsage(stat) + int stat; +{ + PP "usage: ecogrep [-d database] [-f filename] [-i taxid] [-r taxid] [-v] [-h] \n"); + PP "type \"ecogrep -h\" for help\n"); + + if (stat) + exit(stat); +} + +#undef PP + +/* ----------------------------------------------- */ +/* MAIN */ +/* ----------------------------------------------- */ + +int main(int argc, char **argv){ + int32_t carg = 0; + int32_t r = 0; // number of restricted taxid + int32_t g = 0; // number of ignored taxid + int32_t v = 0; + int32_t p = 0; // number of pattern + int32_t i = 0; + int32_t errflag = 0; + int32_t error_max = 0; + int32_t matchingresult = 0; + + ecotaxonomy_t *taxonomy; + + char *filename = NULL; // stores the datafile name + char *database = NULL; // stores the database path (for taxonomy) + int32_t *restricted_taxid = NULL; // stores the restricted taxid + int32_t *ignored_taxid = NULL; // stores the ignored taxid + char **pattern = NULL; // stores the regex pattern + char *line[19] = {0}; // stores the line + + + FILE *file = NULL; + char *stream = ECOMALLOC(sizeof(char *)*10000,"error stream buffer allocation"); + int current_taxid; + char *buffer; + + while ((carg = getopt(argc, argv, "f:d:i:r:e:vh")) != -1) { + + switch (carg) { + case 'f': + filename = ECOMALLOC(strlen(optarg)+1, + "Error on filename allocation"); + strcpy(filename,optarg); + break; + + case 'd': + database = ECOMALLOC(strlen(optarg)+1, + "Error on datafile allocation"); + strcpy(database,optarg); + break; + + case 'i': + ignored_taxid = ECOREALLOC( ignored_taxid, + sizeof(int32_t)*(g+1), + "Error on ignored_taxid reallocation"); + sscanf(optarg,"%d",&ignored_taxid[g]); + g++; + break; + + case 'r': + restricted_taxid = ECOREALLOC( restricted_taxid, + sizeof(int32_t)*(r+1), + "Error on restricted_taxid reallocation"); + sscanf(optarg,"%d",&restricted_taxid[r]); + r++; + break; + + case 'v': + v = 1; + break; + + case 'h': + PrintHelp(); + exit(0); + break; + + case 'e': + sscanf(optarg,"%d",&error_max); + break; + + case '?': + errflag++; + } + } + + /** + * Get the leftover command line arguments back + * and check the pattern is not more than 32 character long + */ + pattern = ECOMALLOC(sizeof *pattern * (argc - optind), "Error in pattern allocation"); + for (p=0 ; argc > optind ; optind++, p++){ + if (strlen(argv[optind]) <= 32) + pattern[p] = strdup(argv[optind]); + else + { + printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\ + \n# Please correct this one : %s\n",argv[optind]); + exit(0); + } + } + + /** + * try to get the database name from environment variable + * if no database name specified in the -d option + + if (!database && getenv("ECOPCRDB")) + database = getenv("ECOPCRDB"); + else + errflag++; + */ + + /** + * check everything required is provided + */ + if (!filename || + !database || + (!p && restricted_taxid == NULL && ignored_taxid == NULL)) + errflag++; + + if (errflag) + ExitUsage(errflag); + + /** + * Open the ecoPCR file + */ + if (filename) + { + if( (file = fopen(filename, "r")) == NULL) + ECOERROR(ECO_IO_ERROR,"Cannot open the file !"); + } + + /** + * Get the taxonomy back + */ + taxonomy = read_taxonomy(database); + + /** + * Parse the stream + */ + while( fgets(stream, 10000, file) ){ + + if (stream[0]!= '#') + { + for( i=0, buffer = strtok(stream,"|"); + buffer != NULL; + i++, buffer = strtok(NULL,"|")) + { + line[i] = strdup(buffer); + } + + sscanf(line[4],"%d",¤t_taxid); + + if(!v) // normal mode + { + if ( (r > 0) && !(eco_is_taxid_included( taxonomy, + restricted_taxid, + r, + current_taxid)) + ) + continue; + + if ( (g > 0) && (eco_is_taxid_included( taxonomy, + ignored_taxid, + g, + current_taxid)) + ) + continue; + } + + else // v mode, invert ignore and restrict taxid + { + if ( (r > 0) && (eco_is_taxid_included( taxonomy, + restricted_taxid, + r, + current_taxid)) + ) + continue; + if ( (g > 0) && !(eco_is_taxid_included( taxonomy, + ignored_taxid, + g, + current_taxid)) + ) + continue; + } + + if ( p == 0 || (ispatternmatching(line,pattern,p,error_max))) + { + printline(line,i); + matchingresult++; + } + } + } + + printf("# %d matching result\n",matchingresult); + + freememory(line,i); + freememory(pattern,p); + ECOFREE(pattern,"Error in free pattern"); + ECOFREE(stream,"Error in free stream"); + return 0; +} \ No newline at end of file