diff --git a/src/ecogrep.c b/src/ecogrep.c index 17aa73e..ca2d6f3 100644 --- a/src/ecogrep.c +++ b/src/ecogrep.c @@ -9,7 +9,7 @@ #define VERSION "0.1" -void getLineContent(char *stream, ecoseq_t *seq){ +void getLineContent(char *stream, ecoseq_t *seq, ecoseq_t *oligoseq_1, ecoseq_t *oligoseq_2){ int i; char *buffer; @@ -25,6 +25,14 @@ void getLineContent(char *stream, ecoseq_t *seq){ case 4: sscanf(buffer,"%d",&seq->taxid); break; + case 13: + oligoseq_1->SQ = strdup(buffer); + oligoseq_1->SQ_length = strlen(buffer); + break; + case 15: + oligoseq_2->SQ = strdup(buffer); + oligoseq_2->SQ_length = strlen(buffer); + break; case 18: seq->SQ = strdup(buffer); seq->SQ_length = strlen(buffer); @@ -74,19 +82,22 @@ static void PrintHelp() PP "------------------------------------------\n"); PP " synopsis : filtering ecoPCR result based on\n"); PP " taxonomic id filter and regular expression pattern\n"); - PP " usage: ecogrep [options] database\n"); + PP " usage: ecogrep [options] filename\n"); PP "------------------------------------------\n"); PP " options:\n"); - PP " -d : [D]atabase containing taxonomic information\n\n"); - PP " -p : [P]attern oligonucleotide pattern\n\n"); - PP " -h : [H]elp - print help\n\n"); - PP " -i : [I]gnore taxonomic id\n\n"); - PP " -r : [R]estrict taxomic id\n\n"); - PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n"); - PP "------------------------------------------\n"); - PP "ecoPCR ouput file name\n"); + PP " -1 : [FIRST] : compare the given pattern with direct strand oligonucleotide"); + PP " -2 : [SECOND] : compare the given pattern with reverse strand oligonucleotide"); + PP " -d : [D]atabase containing taxonomic information\n\n"); + PP " -e : [E]rrors : max error allowed in pattern match (option-1, -2 and -p) (0 by default)\n\n"); + PP " -p : [P]attern : oligonucleotide pattern\n\n"); + PP " -h : [H]elp : print help\n\n"); + PP " -i : [I]gnore subtree under given taxonomic id\n\n"); + PP " -r : [R]estrict search to subtree under given taxomic id\n\n"); + PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n\n"); + PP " argument:\n"); + PP " ecoPCR ouput file name\n"); PP "------------------------------------------\n\n"); - PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki\n"); + PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n"); PP "------------------------------------------\n\n"); } @@ -127,12 +138,20 @@ int main(int argc, char **argv){ int32_t matchingresult = 0; // stores number of matching result ecotaxonomy_t *taxonomy; // stores the taxonomy + ecoseq_t *seq = NULL; // stores sequence info + ecoseq_t *oligoseq_1 = NULL; // stores the oligo_1 info + ecoseq_t *oligoseq_2 = NULL; // stores the oligo_2 info - - char *p = NULL; // number of pattern char *database = NULL; // stores the database path (for taxonomy) - PatternPtr pattern = NULL; // stores the build pattern + + char *p = NULL; // pattern for sequence + PatternPtr pattern = NULL; // stores the build pattern for sequence + char *o1 = NULL; // pattern for direct strand oligo + PatternPtr oligo_1 = NULL; // stores the build pattern for direct strand oligo + char *o2 = NULL; // pattern for reverse strand oligo + PatternPtr oligo_2 = NULL; // stores the build pattern for reverse strand oligo + int32_t *restricted_taxid = NULL; // stores the restricted taxid int32_t *ignored_taxid = NULL; // stores the ignored taxid @@ -143,15 +162,32 @@ int main(int argc, char **argv){ int is_ignored = 0; int is_included = 0; int is_matching = 0; + int match_o1 = 0; + int match_o2 = 0; int good = 0; + seq = new_ecoseq(); + oligoseq_1 = new_ecoseq(); + oligoseq_2 = new_ecoseq(); /** * Parse commande line options **/ - while ((carg = getopt(argc, argv, "p:d:i:r:e:vh")) != -1) { + while ((carg = getopt(argc, argv, "1:2:p:d:i:r:e:vh")) != -1) { switch (carg) { + case '1': + o1 = ECOMALLOC(strlen(optarg)+1, + "Error on o1 allocation"); + strcpy(o1,optarg); + break; + + case '2': + o2 = ECOMALLOC(strlen(optarg)+1, + "Error on o2 allocation"); + strcpy(o2,optarg); + break; + case 'd': database = ECOMALLOC(strlen(optarg)+1, "Error on datafile allocation"); @@ -199,17 +235,48 @@ int main(int argc, char **argv){ } /** - * Check pattern length and build it in PatternPtr format + * Check sequence pattern length and build it in PatternPtr format **/ - if(p && strlen(p) > 32) + if(p) { - printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\ - \n# Please check it out : %s\n",p); - exit(EXIT_FAILURE); - } - else if (p) - if ( (pattern = buildPattern(p,error_max)) == NULL) + if (strlen(p) > 32){ + printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\ + \n# Please check it out : %s\n",p); exit(EXIT_FAILURE); + } + else if ( (pattern = buildPattern(p,error_max)) == NULL) + exit(EXIT_FAILURE); + } + + + + /** + * Check o1 pattern length and build it in PatternPtr format + **/ + if(o1) + { + if (strlen(o1) > 32){ + printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\ + \n# Please check it out : %s\n",o1); + exit(EXIT_FAILURE); + } + else if ( (oligo_1 = buildPattern(o1,error_max)) == NULL) + exit(EXIT_FAILURE); + } + + /** + * Check o2 pattern length and build it in PatternPtr format + **/ + if(o2) + { + if (strlen(o2) > 32){ + printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\ + \n# Please check it out : %s\n",o2); + exit(EXIT_FAILURE); + } + else if ( (oligo_2 = buildPattern(o2,error_max)) == NULL) + exit(EXIT_FAILURE); + } /** * try to get the database name from environment variable @@ -226,7 +293,7 @@ int main(int argc, char **argv){ * check at leat one processing is asked * either patterns or taxid filters **/ - if ( p == NULL && restricted_taxid == NULL && ignored_taxid == NULL ) + if ( !p && !o1 && !o2 && restricted_taxid == NULL && ignored_taxid == NULL ) { errflag++; } @@ -267,8 +334,8 @@ int main(int argc, char **argv){ strcpy(orig,stream); - getLineContent(stream,seq); - + getLineContent(stream,seq,oligoseq_1,oligoseq_2); + /* -----------------------------------------------*/ /* is ignored if at least one option -i */ /* AND */ @@ -295,9 +362,18 @@ int main(int argc, char **argv){ /* match if no pattern or if pattern match current sequence */ /* ----------------------------------------------------------- */ is_matching = ( !p || (ispatternmatching(seq,pattern))); - - - good = (is_included && is_matching && !is_ignored); + + /* ---------------------------------------------------------------------------- */ + /* match if no direct oligo pattern or if pattern match current direct oligo */ + /* ---------------------------------------------------------------------------- */ + match_o1 = (!o1 || (ispatternmatching(oligoseq_1,oligo_1))); + + /* ------------------------------------------------------------------------------- */ + /* match if no revesrse oligo pattern or if pattern match current reverse oligo */ + /* ------------------------------------------------------------------------------- */ + match_o2 = (!o2 || (ispatternmatching(oligoseq_2,oligo_2))); + + good = (is_included && is_matching && !is_ignored && match_o1 && match_o2); if (v) good=!good;