#include "libecoPCR/ecoPCR.h" #include #include #include #include #include #include #define VERSION "0.1" void freememory(char **tab, int32_t num){ int32_t i; for (i=0;itaxid); seq->AC = strdup(line[0]); seq->SQ = strdup(line[18]); seq->SQ_length = strlen(line[18]); apatseq=ecoseq2apatseq(seq,apatseq); for (i=0; i < numpattern ;i++){ current_patt = buildPattern(pattern[i],error_max); if(ManberAll(apatseq,current_patt,0,0,apatseq->seqlen)) return 1; } return 0; } /** *returns the result on standard output * @param line array containing sequence information * @param i length of line **/ void printline(char **line, int32_t i){ int32_t k=0; for (k=0; k < i; k++) printf("%s |",line[k]); printf("\n\n"); } /* ----------------------------------------------- */ /* printout help */ /* ----------------------------------------------- */ #define PP fprintf(stdout, static void PrintHelp() { PP "\n------------------------------------------\n"); PP " ecogrep Version %s\n", VERSION); PP "------------------------------------------\n"); PP " synopsis : filtering ecoPCR result based on\n"); PP " taxonomic id filter and regular expression pattern\n"); PP " usage: ecogrep [options] database\n"); PP "------------------------------------------\n"); PP " options:\n"); PP " -d : [D]atabase containing taxonomic information\n\n"); PP " -f : [F]ile name : ecoPCR ouput file\n\n"); PP " -h : [H]elp - print help\n\n"); PP " -i : [I]gnore taxonomic id\n\n"); PP " -r : [R]estrict taxomic id\n\n"); PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n"); PP "------------------------------------------\n"); PP " Pattern : oligonucleotide pattern\n"); PP "------------------------------------------\n\n"); PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki\n"); PP "------------------------------------------\n\n"); } #undef PP /* ----------------------------------------------- */ /* printout usage and exit */ /* ----------------------------------------------- */ #define PP fprintf(stderr, static void ExitUsage(stat) int stat; { PP "usage: ecogrep [-d database] [-f filename] [-i taxid] [-r taxid] [-v] [-h] \n"); PP "type \"ecogrep -h\" for help\n"); if (stat) exit(stat); } #undef PP /* ----------------------------------------------- */ /* MAIN */ /* ----------------------------------------------- */ int main(int argc, char **argv){ int32_t carg = 0; int32_t r = 0; // number of restricted taxid int32_t g = 0; // number of ignored taxid int32_t v = 0; // stores if -v mode is active int32_t p = 0; // number of pattern int32_t i = 0; int32_t errflag = 0; int32_t error_max = 0; // stores the error rate allowed by the user int32_t matchingresult = 0; // stores number of matching result ecotaxonomy_t *taxonomy; // stores the taxonomy char *database = NULL; // stores the database path (for taxonomy) char **pattern = NULL; // stores the regex pattern char *line[19] = {0}; // stores the line int32_t *restricted_taxid = NULL; // stores the restricted taxid int32_t *ignored_taxid = NULL; // stores the ignored taxid int32_t current_taxid; FILE *file = NULL; // stores the data stream, stdin by default char *stream = ECOMALLOC(sizeof(char *)*10000,"error stream buffer allocation"); char *buffer; /** * Parse commande line options **/ while ((carg = getopt(argc, argv, "f:d:i:r:e:vh")) != -1) { switch (carg) { case 'f': if ( (file = fopen(optarg, "r")) == NULL) errflag++; break; case 'd': database = ECOMALLOC(strlen(optarg)+1, "Error on datafile allocation"); strcpy(database,optarg); break; case 'i': ignored_taxid = ECOREALLOC( ignored_taxid, sizeof(int32_t)*(g+1), "Error on ignored_taxid reallocation"); sscanf(optarg,"%d",&ignored_taxid[g]); g++; break; case 'r': restricted_taxid = ECOREALLOC( restricted_taxid, sizeof(int32_t)*(r+1), "Error on restricted_taxid reallocation"); sscanf(optarg,"%d",&restricted_taxid[r]); r++; break; case 'v': v = 1; break; case 'h': PrintHelp(); exit(0); break; case 'e': sscanf(optarg,"%d",&error_max); break; case '?': errflag++; } } /** * Get the left-over command line arguments back * and check the pattern is not more than 32 character long */ pattern = ECOMALLOC(sizeof *pattern * (argc - optind), "Error in pattern allocation"); for (p=0 ; argc > optind ; optind++, p++){ if (strlen(argv[optind]) <= 32) pattern[p] = strdup(argv[optind]); else { printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\ \n# Please check it out : %s\n",argv[optind]); exit(0); } } /** * check standard input if no file name given in -f option **/ if (file == NULL) { if (isatty(fileno(stdin))) errflag++; else file = stdin; } /** * try to get the database name from environment variable * if no database name specified in the -d option **/ if (database == NULL) { database = getenv("ECOPCRDB"); if (database == NULL) errflag++; } /** * check at leat one processing is asked * either patterns or taxid filters */ if ( !p && restricted_taxid == NULL && ignored_taxid == NULL ) errflag++; if (errflag) ExitUsage(errflag); /** * Get the taxonomy back */ taxonomy = read_taxonomy(database); /** * Parse the stream */ while( fgets(stream, 10000, file) != NULL ){ if (stream[0]!= '#') { for( i=0, buffer = strtok(stream,"|"); buffer != NULL; i++, buffer = strtok(NULL,"|")) { printf("¤¤ %s\n",buffer); line[i] = strdup(buffer); } sscanf(line[4],"%d",¤t_taxid); if(!v) // normal mode { if ( (r > 0) && !(eco_is_taxid_included( taxonomy, restricted_taxid, r, current_taxid)) ) continue; if ( (g > 0) && (eco_is_taxid_included( taxonomy, ignored_taxid, g, current_taxid)) ) continue; } else // v mode, invert ignore and restrict options { if ( (r > 0) && (eco_is_taxid_included( taxonomy, restricted_taxid, r, current_taxid)) ) continue; if ( (g > 0) && !(eco_is_taxid_included( taxonomy, ignored_taxid, g, current_taxid)) ) continue; } if ( p == 0 || (ispatternmatching(line,pattern,p,error_max))) { printline(line,i); matchingresult++; } } } printf("# %d matching result\n",matchingresult); /** * clean, close and free before leaving **/ if ( file != stdin ) fclose(file); freememory(line,i); freememory(pattern,p); ECOFREE(pattern,"Error in free pattern"); ECOFREE(stream,"Error in free stream"); return 0; }