This commit is contained in:
2007-06-14 13:01:41 +00:00
parent 087ee70620
commit 9f4aa93249

View File

@ -6,8 +6,41 @@
#include <stdlib.h> #include <stdlib.h>
#include <sys/stat.h> #include <sys/stat.h>
typedef enum
{
FALSE = 0,
TRUE = 1
} BOOL;
#define VERSION "0.1" #define VERSION "0.1"
void getLineContent(char *stream, ecoseq_t *seq){
int i;
char *buffer;
for( i=0, buffer = strtok(stream,"|");
buffer != NULL;
i++, buffer = strtok(NULL,"|"))
{
switch (i) {
case 0:
seq->AC = strdup(buffer);
break;
case 4:
sscanf(buffer,"%d",&seq->taxid);
break;
case 18:
seq->SQ = strdup(buffer);
seq->SQ_length = strlen(buffer);
break;
default:
break;
}
}
}
void freememory(char **tab, int32_t num){ void freememory(char **tab, int32_t num){
int32_t i; int32_t i;
for (i=0;i<num-1;i++){ for (i=0;i<num-1;i++){
@ -24,39 +57,14 @@ void freememory(char **tab, int32_t num){
* *
* @return int 1 if a pattern match, else 0 * @return int 1 if a pattern match, else 0
**/ **/
int ispatternmatching(char **line, char **pattern, int32_t numpattern, int32_t error_max){ int ispatternmatching(ecoseq_t *seq, PatternPtr pattern){
int i; if (pattern != NULL)
SeqPtr apatseq = NULL; {
ecoseq_t *seq = NULL; SeqPtr apatseq = NULL;
PatternPtr current_patt; apatseq=ecoseq2apatseq(seq,apatseq);
return ManberAll(apatseq,pattern,0,0,apatseq->seqlen) > 0;
seq = new_ecoseq();
sscanf(line[4],"%d",&seq->taxid);
seq->AC = strdup(line[0]);
seq->SQ = strdup(line[18]);
seq->SQ_length = strlen(line[18]);
apatseq=ecoseq2apatseq(seq,apatseq);
for (i=0; i < numpattern ;i++){
current_patt = buildPattern(pattern[i],error_max);
if(ManberAll(apatseq,current_patt,0,0,apatseq->seqlen))
return 1;
} }
return 0; else return 0;
}
/**
*returns the result on standard output
* @param line array containing sequence information
* @param i length of line
**/
void printline(char **line, int32_t i){
int32_t k=0;
for (k=0; k < i; k++)
printf("%s |",line[k]);
printf("\n\n");
} }
/* ----------------------------------------------- */ /* ----------------------------------------------- */
@ -75,13 +83,13 @@ static void PrintHelp()
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP " options:\n"); PP " options:\n");
PP " -d : [D]atabase containing taxonomic information\n\n"); PP " -d : [D]atabase containing taxonomic information\n\n");
PP " -f : [F]ile name : ecoPCR ouput file\n\n"); PP " -p : [P]attern oligonucleotide pattern\n\n");
PP " -h : [H]elp - print <this> help\n\n"); PP " -h : [H]elp - print <this> help\n\n");
PP " -i : [I]gnore taxonomic id\n\n"); PP " -i : [I]gnore taxonomic id\n\n");
PP " -r : [R]estrict taxomic id\n\n"); PP " -r : [R]estrict taxomic id\n\n");
PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n"); PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n");
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP " Pattern : oligonucleotide pattern\n"); PP "ecoPCR ouput file name\n");
PP "------------------------------------------\n\n"); PP "------------------------------------------\n\n");
PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki\n"); PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki\n");
PP "------------------------------------------\n\n"); PP "------------------------------------------\n\n");
@ -98,7 +106,7 @@ static void PrintHelp()
static void ExitUsage(stat) static void ExitUsage(stat)
int stat; int stat;
{ {
PP "usage: ecogrep [-d database] [-f filename] [-i taxid] [-r taxid] [-v] [-h] <pattern>\n"); PP "usage: ecogrep [-d database] [-p pattern] [-i taxid] [-r taxid] [-v] [-h] <file name>\n");
PP "type \"ecogrep -h\" for help\n"); PP "type \"ecogrep -h\" for help\n");
if (stat) if (stat)
@ -111,41 +119,44 @@ static void ExitUsage(stat)
/* MAIN */ /* MAIN */
/* ----------------------------------------------- */ /* ----------------------------------------------- */
#define LINE_BUFF_SIZE 10000
int main(int argc, char **argv){ int main(int argc, char **argv){
int32_t carg = 0; int32_t carg = 0;
int32_t r = 0; // number of restricted taxid int32_t r = 0; // number of restricted taxid
int32_t g = 0; // number of ignored taxid int32_t i = 0; // number of ignored taxid
int32_t v = 0; // stores if -v mode is active int32_t v = 0; // stores if -v mode is active
int32_t p = 0; // number of pattern int32_t k = 0; // file counter
int32_t i = 0; int32_t errflag = 0;
int32_t errflag = 0;
int32_t error_max = 0; // stores the error rate allowed by the user int32_t error_max = 0; // stores the error rate allowed by the user
int32_t matchingresult = 0; // stores number of matching result int32_t matchingresult = 0; // stores number of matching result
ecotaxonomy_t *taxonomy; // stores the taxonomy ecotaxonomy_t *taxonomy; // stores the taxonomy
ecoseq_t *seq = NULL; // stores sequence info
char *p = NULL; // number of pattern
char *database = NULL; // stores the database path (for taxonomy) char *database = NULL; // stores the database path (for taxonomy)
char **pattern = NULL; // stores the regex pattern PatternPtr pattern = NULL; // stores the build pattern
char *line[19] = {0}; // stores the line
int32_t *restricted_taxid = NULL; // stores the restricted taxid int32_t *restricted_taxid = NULL; // stores the restricted taxid
int32_t *ignored_taxid = NULL; // stores the ignored taxid int32_t *ignored_taxid = NULL; // stores the ignored taxid
int32_t current_taxid;
FILE *file = NULL; // stores the data stream, stdin by default FILE *file = NULL; // stores the data stream, stdin by default
char *stream = ECOMALLOC(sizeof(char *)*10000,"error stream buffer allocation"); char *stream = ECOMALLOC(sizeof(char *)*LINE_BUFF_SIZE,"error stream buffer allocation");
char *buffer; char *orig = ECOMALLOC(sizeof(char *)*LINE_BUFF_SIZE,"error orig buffer allocation");
int is_ignored = 0;
int is_included = 0;
int is_matching = 0;
seq = new_ecoseq();
/** /**
* Parse commande line options * Parse commande line options
**/ **/
while ((carg = getopt(argc, argv, "f:d:i:r:e:vh")) != -1) { while ((carg = getopt(argc, argv, "p:d:i:r:e:vh")) != -1) {
switch (carg) { switch (carg) {
case 'f':
if ( (file = fopen(optarg, "r")) == NULL)
errflag++;
break;
case 'd': case 'd':
database = ECOMALLOC(strlen(optarg)+1, database = ECOMALLOC(strlen(optarg)+1,
"Error on datafile allocation"); "Error on datafile allocation");
@ -154,10 +165,10 @@ int main(int argc, char **argv){
case 'i': case 'i':
ignored_taxid = ECOREALLOC( ignored_taxid, ignored_taxid = ECOREALLOC( ignored_taxid,
sizeof(int32_t)*(g+1), sizeof(int32_t)*(i+1),
"Error on ignored_taxid reallocation"); "Error on ignored_taxid reallocation");
sscanf(optarg,"%d",&ignored_taxid[g]); sscanf(optarg,"%d",&ignored_taxid[i]);
g++; i++;
break; break;
case 'r': case 'r':
@ -178,41 +189,33 @@ int main(int argc, char **argv){
break; break;
case 'e': case 'e':
sscanf(optarg,"%d",&error_max); sscanf(optarg,"%d",&error_max);
break; break;
case 'p':
p = ECOMALLOC(strlen(optarg)+1,
"Error on pattern allocation");
strcpy(p,optarg);
break;
case '?': case '?':
errflag++; errflag++;
} }
} }
/** /**
* Get the left-over command line arguments back * Check pattern length and build it in PatternPtr format
* and check the pattern is not more than 32 character long
*/
pattern = ECOMALLOC(sizeof *pattern * (argc - optind), "Error in pattern allocation");
for (p=0 ; argc > optind ; optind++, p++){
if (strlen(argv[optind]) <= 32)
pattern[p] = strdup(argv[optind]);
else
{
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",argv[optind]);
exit(0);
}
}
/**
* check standard input if no file name given in -f option
**/ **/
if (file == NULL) if(p && strlen(p) > 32)
{ {
if (isatty(fileno(stdin))) printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
errflag++; \n# Please check it out : %s\n",p);
else exit(EXIT_FAILURE);
file = stdin; }
} else if (p)
if ( (pattern = buildPattern(p,error_max)) == NULL)
exit(EXIT_FAILURE);
/** /**
* try to get the database name from environment variable * try to get the database name from environment variable
* if no database name specified in the -d option * if no database name specified in the -d option
@ -228,9 +231,10 @@ int main(int argc, char **argv){
* check at leat one processing is asked * check at leat one processing is asked
* either patterns or taxid filters * either patterns or taxid filters
*/ */
if ( !p && restricted_taxid == NULL && ignored_taxid == NULL ) if ( p == NULL && restricted_taxid == NULL && ignored_taxid == NULL )
{
errflag++; errflag++;
}
if (errflag) if (errflag)
ExitUsage(errflag); ExitUsage(errflag);
@ -241,73 +245,93 @@ int main(int argc, char **argv){
/** /**
* Parse the stream * Parse the stream
*/ */
while( fgets(stream, 10000, file) != NULL ){ for (k=0 ; argc >= optind ; optind++, k++){
if (stream[0]!= '#') matchingresult = 0;
{
for( i=0, buffer = strtok(stream,"|"); if ( (file = fopen(argv[optind], "r")) == NULL)
buffer != NULL; {
i++, buffer = strtok(NULL,"|")) if (isatty(fileno(stdin)) == 0)
{ {
printf("<EFBFBD><EFBFBD> %s\n",buffer); file = stdin;
line[i] = strdup(buffer); printf("# Processing standard input...\n");
} }
else
sscanf(line[4],"%d",&current_taxid); break;
}
if(!v) // normal mode else
{ printf("# Processing %s...\n",argv[optind]);
if ( (r > 0) && !(eco_is_taxid_included( taxonomy,
restricted_taxid, while( fgets(stream, LINE_BUFF_SIZE, file) != NULL ){
r,
current_taxid)) if (stream[0]!= '#')
) {
continue;
stream[LINE_BUFF_SIZE-1]=0;
if ( (g > 0) && (eco_is_taxid_included( taxonomy,
ignored_taxid, strcpy(orig,stream);
g,
current_taxid)) getLineContent(stream,seq);
)
continue; /* -----------------------------------------------*/
} /* ignored if : */
/* - v mode and no ignored */
else // v mode, invert ignore and restrict options /* OR */
{ /* - at least one -i option used */
if ( (r > 0) && (eco_is_taxid_included( taxonomy, /* AND */
restricted_taxid, /* - -i option is parent of current taxid */
r, /* -----------------------------------------------*/
current_taxid)) is_ignored = ( (v && i==0) ||
) ( (i > 0) && (eco_is_taxid_included( taxonomy,
continue; ignored_taxid,
if ( (g > 0) && !(eco_is_taxid_included( taxonomy, i,
ignored_taxid, seq->taxid))
g, )
current_taxid)) );
)
continue; /* -----------------------------------------------*/
} /* included if : */
/* - normal mode and no restriction */
if ( p == 0 || (ispatternmatching(line,pattern,p,error_max))) /* OR */
{ /* - is -r option is parent of current taxid */
printline(line,i); /* -----------------------------------------------*/
matchingresult++; is_included = ( (!v && (r == 0)) || (eco_is_taxid_included( taxonomy,
} restricted_taxid,
} r,
seq->taxid))
);
/* -----------------------------------------------*/
/* match if no pattern or if function return 1 */
/* -----------------------------------------------*/
is_matching = ( (!v && !p) || (ispatternmatching(seq,pattern)));
if (
(!v && (!is_ignored && is_included && is_matching) ) ||
( v && (is_ignored && !is_included && !is_matching) )
)
{
printf("%s",orig);
matchingresult++;
}
}
}
if ( file != stdin )
fclose(file);
printf("# %d matching result(s)\n#\n",matchingresult);
} }
printf("# %d matching result\n",matchingresult);
/** /**
* clean, close and free before leaving * clean and free before leaving
**/ **/
if ( file != stdin ) ECOFREE(orig,"Error in free orig");
fclose(file);
freememory(line,i);
freememory(pattern,p);
ECOFREE(pattern,"Error in free pattern");
ECOFREE(stream,"Error in free stream"); ECOFREE(stream,"Error in free stream");
ECOFREE(ignored_taxid,"Error in free stream");
ECOFREE(restricted_taxid,"Error in free stream");
return 0; return 0;
} }