This commit is contained in:
2007-06-08 13:07:33 +00:00
parent c736f5b59d
commit 0e5b94ad80

289
src/ecogrep.c Normal file
View File

@ -0,0 +1,289 @@
#include "libecoPCR/ecoPCR.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#define VERSION "0.1"
void freememory(char **tab, int32_t num){
int32_t i;
for (i=0;i<num-1;i++){
ECOFREE(tab[i],"Error in freememory function");
}
}
int ispatternmatching(char **line, char **pattern, int32_t numpattern, int32_t error_max){
int i;
SeqPtr apatseq = NULL;
ecoseq_t *seq = NULL;
PatternPtr current_patt;
seq = new_ecoseq();
sscanf(line[4],"%d",&seq->taxid);
seq->AC = strdup(line[0]);
//seq->DE = strdup(line[]);
seq->SQ = strdup(line[18]);
seq->SQ_length = strlen(line[18]);
apatseq=ecoseq2apatseq(seq,apatseq);
for (i=0; i < numpattern ;i++){
current_patt = buildPattern(pattern[i],error_max);
if(ManberAll(apatseq,current_patt,0,0,apatseq->seqlen))
return 1;
}
return 0;
}
void printline(char **line, int32_t i){
int32_t k=0;
for (k=0; k < i; k++)
printf("%s |",line[k]);
printf("\n\n");
}
/* ----------------------------------------------- */
/* printout help */
/* ----------------------------------------------- */
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "\n------------------------------------------\n");
PP " ecogrep Version %s\n", VERSION);
PP "------------------------------------------\n");
PP " synopsis : filtering ecoPCR result based on\n");
PP " taxonomic id filter and regular expression pattern\n");
PP " usage: ecogrep [options] database\n");
PP "------------------------------------------\n");
PP " options:\n");
PP " -d : [D]atabase containing taxonomic information\n\n");
PP " -f : [F]ile name : ecoPCR ouput file\n\n");
PP " -h : [H]elp - print <this> help\n\n");
PP " -i : [I]gnore taxonomic id\n\n");
PP " -r : [R]estrict taxomic id\n\n");
PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n");
PP "------------------------------------------\n");
PP " Pattern : oligonucleotide pattern\n");;
PP "------------------------------------------\n\n");
PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki");
PP "------------------------------------------\n\n");
}
#undef PP
/* ----------------------------------------------- */
/* printout usage and exit */
/* ----------------------------------------------- */
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecogrep [-d database] [-f filename] [-i taxid] [-r taxid] [-v] [-h] <pattern>\n");
PP "type \"ecogrep -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/* ----------------------------------------------- */
/* MAIN */
/* ----------------------------------------------- */
int main(int argc, char **argv){
int32_t carg = 0;
int32_t r = 0; // number of restricted taxid
int32_t g = 0; // number of ignored taxid
int32_t v = 0;
int32_t p = 0; // number of pattern
int32_t i = 0;
int32_t errflag = 0;
int32_t error_max = 0;
int32_t matchingresult = 0;
ecotaxonomy_t *taxonomy;
char *filename = NULL; // stores the datafile name
char *database = NULL; // stores the database path (for taxonomy)
int32_t *restricted_taxid = NULL; // stores the restricted taxid
int32_t *ignored_taxid = NULL; // stores the ignored taxid
char **pattern = NULL; // stores the regex pattern
char *line[19] = {0}; // stores the line
FILE *file = NULL;
char *stream = ECOMALLOC(sizeof(char *)*10000,"error stream buffer allocation");
int current_taxid;
char *buffer;
while ((carg = getopt(argc, argv, "f:d:i:r:e:vh")) != -1) {
switch (carg) {
case 'f':
filename = ECOMALLOC(strlen(optarg)+1,
"Error on filename allocation");
strcpy(filename,optarg);
break;
case 'd':
database = ECOMALLOC(strlen(optarg)+1,
"Error on datafile allocation");
strcpy(database,optarg);
break;
case 'i':
ignored_taxid = ECOREALLOC( ignored_taxid,
sizeof(int32_t)*(g+1),
"Error on ignored_taxid reallocation");
sscanf(optarg,"%d",&ignored_taxid[g]);
g++;
break;
case 'r':
restricted_taxid = ECOREALLOC( restricted_taxid,
sizeof(int32_t)*(r+1),
"Error on restricted_taxid reallocation");
sscanf(optarg,"%d",&restricted_taxid[r]);
r++;
break;
case 'v':
v = 1;
break;
case 'h':
PrintHelp();
exit(0);
break;
case 'e':
sscanf(optarg,"%d",&error_max);
break;
case '?':
errflag++;
}
}
/**
* Get the leftover command line arguments back
* and check the pattern is not more than 32 character long
*/
pattern = ECOMALLOC(sizeof *pattern * (argc - optind), "Error in pattern allocation");
for (p=0 ; argc > optind ; optind++, p++){
if (strlen(argv[optind]) <= 32)
pattern[p] = strdup(argv[optind]);
else
{
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please correct this one : %s\n",argv[optind]);
exit(0);
}
}
/**
* try to get the database name from environment variable
* if no database name specified in the -d option
if (!database && getenv("ECOPCRDB"))
database = getenv("ECOPCRDB");
else
errflag++;
*/
/**
* check everything required is provided
*/
if (!filename ||
!database ||
(!p && restricted_taxid == NULL && ignored_taxid == NULL))
errflag++;
if (errflag)
ExitUsage(errflag);
/**
* Open the ecoPCR file
*/
if (filename)
{
if( (file = fopen(filename, "r")) == NULL)
ECOERROR(ECO_IO_ERROR,"Cannot open the file !");
}
/**
* Get the taxonomy back
*/
taxonomy = read_taxonomy(database);
/**
* Parse the stream
*/
while( fgets(stream, 10000, file) ){
if (stream[0]!= '#')
{
for( i=0, buffer = strtok(stream,"|");
buffer != NULL;
i++, buffer = strtok(NULL,"|"))
{
line[i] = strdup(buffer);
}
sscanf(line[4],"%d",&current_taxid);
if(!v) // normal mode
{
if ( (r > 0) && !(eco_is_taxid_included( taxonomy,
restricted_taxid,
r,
current_taxid))
)
continue;
if ( (g > 0) && (eco_is_taxid_included( taxonomy,
ignored_taxid,
g,
current_taxid))
)
continue;
}
else // v mode, invert ignore and restrict taxid
{
if ( (r > 0) && (eco_is_taxid_included( taxonomy,
restricted_taxid,
r,
current_taxid))
)
continue;
if ( (g > 0) && !(eco_is_taxid_included( taxonomy,
ignored_taxid,
g,
current_taxid))
)
continue;
}
if ( p == 0 || (ispatternmatching(line,pattern,p,error_max)))
{
printline(line,i);
matchingresult++;
}
}
}
printf("# %d matching result\n",matchingresult);
freememory(line,i);
freememory(pattern,p);
ECOFREE(pattern,"Error in free pattern");
ECOFREE(stream,"Error in free stream");
return 0;
}