This commit is contained in:
2007-06-21 13:57:59 +00:00
parent 1c421f5903
commit 36d536305e

View File

@ -9,7 +9,7 @@
#define VERSION "0.1"
void getLineContent(char *stream, ecoseq_t *seq){
void getLineContent(char *stream, ecoseq_t *seq, ecoseq_t *oligoseq_1, ecoseq_t *oligoseq_2){
int i;
char *buffer;
@ -25,6 +25,14 @@ void getLineContent(char *stream, ecoseq_t *seq){
case 4:
sscanf(buffer,"%d",&seq->taxid);
break;
case 13:
oligoseq_1->SQ = strdup(buffer);
oligoseq_1->SQ_length = strlen(buffer);
break;
case 15:
oligoseq_2->SQ = strdup(buffer);
oligoseq_2->SQ_length = strlen(buffer);
break;
case 18:
seq->SQ = strdup(buffer);
seq->SQ_length = strlen(buffer);
@ -74,19 +82,22 @@ static void PrintHelp()
PP "------------------------------------------\n");
PP " synopsis : filtering ecoPCR result based on\n");
PP " taxonomic id filter and regular expression pattern\n");
PP " usage: ecogrep [options] database\n");
PP " usage: ecogrep [options] filename\n");
PP "------------------------------------------\n");
PP " options:\n");
PP " -d : [D]atabase containing taxonomic information\n\n");
PP " -p : [P]attern oligonucleotide pattern\n\n");
PP " -h : [H]elp - print <this> help\n\n");
PP " -i : [I]gnore taxonomic id\n\n");
PP " -r : [R]estrict taxomic id\n\n");
PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n");
PP "------------------------------------------\n");
PP "ecoPCR ouput file name\n");
PP " -1 : [FIRST] : compare the given pattern with direct strand oligonucleotide");
PP " -2 : [SECOND] : compare the given pattern with reverse strand oligonucleotide");
PP " -d : [D]atabase containing taxonomic information\n\n");
PP " -e : [E]rrors : max error allowed in pattern match (option-1, -2 and -p) (0 by default)\n\n");
PP " -p : [P]attern : oligonucleotide pattern\n\n");
PP " -h : [H]elp : print <this> help\n\n");
PP " -i : [I]gnore subtree under given taxonomic id\n\n");
PP " -r : [R]estrict search to subtree under given taxomic id\n\n");
PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n\n");
PP " argument:\n");
PP " ecoPCR ouput file name\n");
PP "------------------------------------------\n\n");
PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
PP "------------------------------------------\n\n");
}
@ -127,12 +138,20 @@ int main(int argc, char **argv){
int32_t matchingresult = 0; // stores number of matching result
ecotaxonomy_t *taxonomy; // stores the taxonomy
ecoseq_t *seq = NULL; // stores sequence info
ecoseq_t *oligoseq_1 = NULL; // stores the oligo_1 info
ecoseq_t *oligoseq_2 = NULL; // stores the oligo_2 info
char *p = NULL; // number of pattern
char *database = NULL; // stores the database path (for taxonomy)
PatternPtr pattern = NULL; // stores the build pattern
char *p = NULL; // pattern for sequence
PatternPtr pattern = NULL; // stores the build pattern for sequence
char *o1 = NULL; // pattern for direct strand oligo
PatternPtr oligo_1 = NULL; // stores the build pattern for direct strand oligo
char *o2 = NULL; // pattern for reverse strand oligo
PatternPtr oligo_2 = NULL; // stores the build pattern for reverse strand oligo
int32_t *restricted_taxid = NULL; // stores the restricted taxid
int32_t *ignored_taxid = NULL; // stores the ignored taxid
@ -143,15 +162,32 @@ int main(int argc, char **argv){
int is_ignored = 0;
int is_included = 0;
int is_matching = 0;
int match_o1 = 0;
int match_o2 = 0;
int good = 0;
seq = new_ecoseq();
oligoseq_1 = new_ecoseq();
oligoseq_2 = new_ecoseq();
/**
* Parse commande line options
**/
while ((carg = getopt(argc, argv, "p:d:i:r:e:vh")) != -1) {
while ((carg = getopt(argc, argv, "1:2:p:d:i:r:e:vh")) != -1) {
switch (carg) {
case '1':
o1 = ECOMALLOC(strlen(optarg)+1,
"Error on o1 allocation");
strcpy(o1,optarg);
break;
case '2':
o2 = ECOMALLOC(strlen(optarg)+1,
"Error on o2 allocation");
strcpy(o2,optarg);
break;
case 'd':
database = ECOMALLOC(strlen(optarg)+1,
"Error on datafile allocation");
@ -199,17 +235,48 @@ int main(int argc, char **argv){
}
/**
* Check pattern length and build it in PatternPtr format
* Check sequence pattern length and build it in PatternPtr format
**/
if(p && strlen(p) > 32)
if(p)
{
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",p);
exit(EXIT_FAILURE);
}
else if (p)
if ( (pattern = buildPattern(p,error_max)) == NULL)
if (strlen(p) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",p);
exit(EXIT_FAILURE);
}
else if ( (pattern = buildPattern(p,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* Check o1 pattern length and build it in PatternPtr format
**/
if(o1)
{
if (strlen(o1) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",o1);
exit(EXIT_FAILURE);
}
else if ( (oligo_1 = buildPattern(o1,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* Check o2 pattern length and build it in PatternPtr format
**/
if(o2)
{
if (strlen(o2) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",o2);
exit(EXIT_FAILURE);
}
else if ( (oligo_2 = buildPattern(o2,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* try to get the database name from environment variable
@ -226,7 +293,7 @@ int main(int argc, char **argv){
* check at leat one processing is asked
* either patterns or taxid filters
**/
if ( p == NULL && restricted_taxid == NULL && ignored_taxid == NULL )
if ( !p && !o1 && !o2 && restricted_taxid == NULL && ignored_taxid == NULL )
{
errflag++;
}
@ -267,8 +334,8 @@ int main(int argc, char **argv){
strcpy(orig,stream);
getLineContent(stream,seq);
getLineContent(stream,seq,oligoseq_1,oligoseq_2);
/* -----------------------------------------------*/
/* is ignored if at least one option -i */
/* AND */
@ -295,9 +362,18 @@ int main(int argc, char **argv){
/* match if no pattern or if pattern match current sequence */
/* ----------------------------------------------------------- */
is_matching = ( !p || (ispatternmatching(seq,pattern)));
good = (is_included && is_matching && !is_ignored);
/* ---------------------------------------------------------------------------- */
/* match if no direct oligo pattern or if pattern match current direct oligo */
/* ---------------------------------------------------------------------------- */
match_o1 = (!o1 || (ispatternmatching(oligoseq_1,oligo_1)));
/* ------------------------------------------------------------------------------- */
/* match if no revesrse oligo pattern or if pattern match current reverse oligo */
/* ------------------------------------------------------------------------------- */
match_o2 = (!o2 || (ispatternmatching(oligoseq_2,oligo_2)));
good = (is_included && is_matching && !is_ignored && match_o1 && match_o2);
if (v)
good=!good;