#include "libecoPCR/ecoPCR.h" #include #include #include #include #define VERSION "0.1" void freememory(char **tab, int32_t num){ int32_t i; for (i=0;itaxid); seq->AC = strdup(line[0]); //seq->DE = strdup(line[]); seq->SQ = strdup(line[18]); seq->SQ_length = strlen(line[18]); apatseq=ecoseq2apatseq(seq,apatseq); for (i=0; i < numpattern ;i++){ current_patt = buildPattern(pattern[i],error_max); if(ManberAll(apatseq,current_patt,0,0,apatseq->seqlen)) return 1; } return 0; } void printline(char **line, int32_t i){ int32_t k=0; for (k=0; k < i; k++) printf("%s |",line[k]); printf("\n\n"); } /* ----------------------------------------------- */ /* printout help */ /* ----------------------------------------------- */ #define PP fprintf(stdout, static void PrintHelp() { PP "\n------------------------------------------\n"); PP " ecogrep Version %s\n", VERSION); PP "------------------------------------------\n"); PP " synopsis : filtering ecoPCR result based on\n"); PP " taxonomic id filter and regular expression pattern\n"); PP " usage: ecogrep [options] database\n"); PP "------------------------------------------\n"); PP " options:\n"); PP " -d : [D]atabase containing taxonomic information\n\n"); PP " -f : [F]ile name : ecoPCR ouput file\n\n"); PP " -h : [H]elp - print help\n\n"); PP " -i : [I]gnore taxonomic id\n\n"); PP " -r : [R]estrict taxomic id\n\n"); PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n"); PP "------------------------------------------\n"); PP " Pattern : oligonucleotide pattern\n");; PP "------------------------------------------\n\n"); PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki"); PP "------------------------------------------\n\n"); } #undef PP /* ----------------------------------------------- */ /* printout usage and exit */ /* ----------------------------------------------- */ #define PP fprintf(stderr, static void ExitUsage(stat) int stat; { PP "usage: ecogrep [-d database] [-f filename] [-i taxid] [-r taxid] [-v] [-h] \n"); PP "type \"ecogrep -h\" for help\n"); if (stat) exit(stat); } #undef PP /* ----------------------------------------------- */ /* MAIN */ /* ----------------------------------------------- */ int main(int argc, char **argv){ int32_t carg = 0; int32_t r = 0; // number of restricted taxid int32_t g = 0; // number of ignored taxid int32_t v = 0; int32_t p = 0; // number of pattern int32_t i = 0; int32_t errflag = 0; int32_t error_max = 0; int32_t matchingresult = 0; ecotaxonomy_t *taxonomy; char *filename = NULL; // stores the datafile name char *database = NULL; // stores the database path (for taxonomy) int32_t *restricted_taxid = NULL; // stores the restricted taxid int32_t *ignored_taxid = NULL; // stores the ignored taxid char **pattern = NULL; // stores the regex pattern char *line[19] = {0}; // stores the line FILE *file = NULL; char *stream = ECOMALLOC(sizeof(char *)*10000,"error stream buffer allocation"); int current_taxid; char *buffer; while ((carg = getopt(argc, argv, "f:d:i:r:e:vh")) != -1) { switch (carg) { case 'f': filename = ECOMALLOC(strlen(optarg)+1, "Error on filename allocation"); strcpy(filename,optarg); break; case 'd': database = ECOMALLOC(strlen(optarg)+1, "Error on datafile allocation"); strcpy(database,optarg); break; case 'i': ignored_taxid = ECOREALLOC( ignored_taxid, sizeof(int32_t)*(g+1), "Error on ignored_taxid reallocation"); sscanf(optarg,"%d",&ignored_taxid[g]); g++; break; case 'r': restricted_taxid = ECOREALLOC( restricted_taxid, sizeof(int32_t)*(r+1), "Error on restricted_taxid reallocation"); sscanf(optarg,"%d",&restricted_taxid[r]); r++; break; case 'v': v = 1; break; case 'h': PrintHelp(); exit(0); break; case 'e': sscanf(optarg,"%d",&error_max); break; case '?': errflag++; } } /** * Get the leftover command line arguments back * and check the pattern is not more than 32 character long */ pattern = ECOMALLOC(sizeof *pattern * (argc - optind), "Error in pattern allocation"); for (p=0 ; argc > optind ; optind++, p++){ if (strlen(argv[optind]) <= 32) pattern[p] = strdup(argv[optind]); else { printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\ \n# Please correct this one : %s\n",argv[optind]); exit(0); } } /** * try to get the database name from environment variable * if no database name specified in the -d option if (!database && getenv("ECOPCRDB")) database = getenv("ECOPCRDB"); else errflag++; */ /** * check everything required is provided */ if (!filename || !database || (!p && restricted_taxid == NULL && ignored_taxid == NULL)) errflag++; if (errflag) ExitUsage(errflag); /** * Open the ecoPCR file */ if (filename) { if( (file = fopen(filename, "r")) == NULL) ECOERROR(ECO_IO_ERROR,"Cannot open the file !"); } /** * Get the taxonomy back */ taxonomy = read_taxonomy(database); /** * Parse the stream */ while( fgets(stream, 10000, file) ){ if (stream[0]!= '#') { for( i=0, buffer = strtok(stream,"|"); buffer != NULL; i++, buffer = strtok(NULL,"|")) { line[i] = strdup(buffer); } sscanf(line[4],"%d",¤t_taxid); if(!v) // normal mode { if ( (r > 0) && !(eco_is_taxid_included( taxonomy, restricted_taxid, r, current_taxid)) ) continue; if ( (g > 0) && (eco_is_taxid_included( taxonomy, ignored_taxid, g, current_taxid)) ) continue; } else // v mode, invert ignore and restrict taxid { if ( (r > 0) && (eco_is_taxid_included( taxonomy, restricted_taxid, r, current_taxid)) ) continue; if ( (g > 0) && !(eco_is_taxid_included( taxonomy, ignored_taxid, g, current_taxid)) ) continue; } if ( p == 0 || (ispatternmatching(line,pattern,p,error_max))) { printline(line,i); matchingresult++; } } } printf("# %d matching result\n",matchingresult); freememory(line,i); freememory(pattern,p); ECOFREE(pattern,"Error in free pattern"); ECOFREE(stream,"Error in free stream"); return 0; }