git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@18 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
144
src/ecopcr.c
144
src/ecopcr.c
@ -15,97 +15,42 @@
|
||||
static void PrintHelp()
|
||||
{
|
||||
PP "------------------------------------------\n");
|
||||
PP " Apat Version %s\n", VERSION);
|
||||
PP " ecoPCR Version %s\n", VERSION);
|
||||
PP "------------------------------------------\n");
|
||||
PP "synopsis : pattern(s) searching program\n");
|
||||
PP "usage: apat [options] patfile datafile\n");
|
||||
PP "synopsis : searching for sequence and taxonomy hybridingwith given primers\n");
|
||||
PP "usage: ecoPCR [options] datafile\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "options:\n");
|
||||
PP "-a code : [A]lphabet encoding for pattern\n");
|
||||
PP " code is one of : \n");
|
||||
PP " dna: use IUPAC equivalences for dna/rna\n");
|
||||
PP " prot: use IUPAC equivalences for proteins\n");
|
||||
PP " alpha: no equivalences, just treat plain symbols\n");
|
||||
PP " note: the equivalences are used in pattern only\n");
|
||||
PP " *not* in sequence(s) (see note (4) below)\n");
|
||||
PP " dft: alpha\n");
|
||||
PP "-c : [C]ooccurences\n");
|
||||
PP " print patterns cooccurence matrix \n");
|
||||
PP " dft: off\n");
|
||||
PP "-h : [H]elp - print <this> help\n");
|
||||
PP "-m : [M]ultiple occurences\n");
|
||||
PP " see -q option \n");
|
||||
PP " dft: off\n");
|
||||
PP "-o file : [O]utput sequences\n");
|
||||
PP " additionaly output sequence(s) that match into\n");
|
||||
PP " 'file' in fasta format\n");
|
||||
PP " dft: off\n");
|
||||
PP "-p : no [Print] - don't printout hits\n");
|
||||
PP " when just counts are needed\n");
|
||||
PP " dft: off\n");
|
||||
PP "-q nn : [Quorum]\n");
|
||||
PP " printout result if at least nn\n");
|
||||
PP " different patterns are found on the sequence\n");
|
||||
PP " (with -m : at least nn different <hits>)\n");
|
||||
PP " dft: # of patterns read\n");
|
||||
PP "-s : no [Sort] - don't sort hits before printing\n");
|
||||
PP " usually hits are printed by increasing position\n");
|
||||
PP " this option will list them by pattern\n");
|
||||
PP " dft: off\n");
|
||||
PP "-t : [T]est sequence\n");
|
||||
PP " additionnaly check if sequences are uppercase\n");
|
||||
PP " this is mostly used for testing\n");
|
||||
PP " dft: off\n");
|
||||
PP "-u : [U]pper\n");
|
||||
PP " force lower->upper sequence conversion\n");
|
||||
PP " without this option lowercase symbols in sequence\n");
|
||||
PP " will not be considered to as matches\n");
|
||||
PP " dft: off\n");
|
||||
PP "-v : [V]erbose\n");
|
||||
PP " just display a kind of progress clock on stderr\n");
|
||||
PP " (this is only useful if you redirect stdout)\n");
|
||||
PP "\n");
|
||||
PP "patfile : pattern file (see below)\n");
|
||||
PP "datafile : database file (see below)\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "pattern file format :\n");
|
||||
PP " one pattern/line\n");
|
||||
PP " format : <pattern> <space> #errors\n");
|
||||
PP " <pattern> := pattern<symbol>\n");
|
||||
PP " or !pattern<symbol>\n");
|
||||
PP " or pattern<symbol>#\n");
|
||||
PP " or !pattern<symbol>#\n");
|
||||
PP " <symbol> := <letter>\n");
|
||||
PP " or [<letter>....<letter>]\n");
|
||||
PP " <letter> := uppercase letter (A-Z)\n");
|
||||
PP " <number> := a positive number indicates max number of mismatches\n");
|
||||
PP " a negative number indicates max number of mismatches or indels\n");
|
||||
PP " # means that no error is allowed at this position\n");
|
||||
PP " ! complement the <symbol>\n");
|
||||
PP " [...] means that all symbols within [] are allowed\n");
|
||||
PP " in addition IUPAC equivalences may be used as symbols\n");
|
||||
PP " with the -a option\n");
|
||||
PP "\n");
|
||||
PP "example: G[DE]S#[GIV]!HP![DE]# 1\n");
|
||||
PP "-1 : [FIRST] oligonucleotide for direct strand\n\n");
|
||||
PP "-2 : [SECOND] oligonucleotide for reverse strand\n\n");
|
||||
PP "-e : [E]rror \n");
|
||||
PP " : max error allowed by oligonucleotide\n\n");
|
||||
PP "-h : [H]elp - print <this> help\n\n");
|
||||
PP "-i : [I]gnore the given taxonomy id.\n");
|
||||
PP " taxonomy id are available using the ecofind program.\n");
|
||||
PP " see its help typing ecofind -h for more information.\n");
|
||||
PP "-k : [K]ingdom mode\n");
|
||||
PP " set the kingdom mode\n");
|
||||
PP " super kingdom mode by default.\n\n");
|
||||
PP "-l : minimum [L]ength\n");
|
||||
PP " define the minimum amplication length. \n\n");
|
||||
PP "-L : maximum [L]ength\n");
|
||||
PP " define the maximum amplicationlength. \n\n");
|
||||
PP "-r : [R]estricts the search to the given taxonomy id.\n");
|
||||
PP " taxonomy id are available using the ecofind program.\n");
|
||||
PP " see its help typing ecofind -h for more information.\n");
|
||||
PP "\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "datafile contains one or more sequences in\n");
|
||||
PP "Fasta format, with *uppercase* symbols \n");
|
||||
PP "\n");
|
||||
PP "datafile : to match the expected format, the database\n");
|
||||
PP "has to be formated first by the ecoPCRFormat.py program located.\n");
|
||||
PP "in the tools directory.\n");
|
||||
PP "ecoPCRFormat.py creates three file types :");
|
||||
PP " .sdx : contains the sequences\n");
|
||||
PP " .tdx : contains information concerning the taxonomy\n");
|
||||
PP " .rdx : contains the taxonomy rank\n\n");
|
||||
PP "ecoPCR needs all the file type. As a result, you have to write the\n");
|
||||
PP "datafile radical without any extension. For example /database/gbmam\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "note (1): the maximum number of patterns is %d\n", MAX_PATTERN);
|
||||
PP "\n");
|
||||
PP "note (2): the maximum length for one pattern is %d\n", MAX_PAT_LEN);
|
||||
PP "\n");
|
||||
PP "note (3): indels are still experimental and are :\n");
|
||||
PP " not handled gracefully with the # syntax\n");
|
||||
PP " and hits are not printed very nicely\n");
|
||||
PP "\n");
|
||||
PP "note (4): the IUPAC equivalences (-a option) are used\n");
|
||||
PP " in pattern only *not* in sequence(s).\n");
|
||||
PP " for instance GATN (with option -a dna) is equivalent to GAT[ACGT]\n");
|
||||
PP " and will match GATA/GATC/GATG/GATC but will not match GATN\n");
|
||||
PP " (nor NNNN) in sequence.\n");
|
||||
PP "\n");
|
||||
|
||||
}
|
||||
@ -332,10 +277,10 @@ int main(int argc, char **argv)
|
||||
|
||||
|
||||
while ((carg = getopt(argc, argv, "h1:2:l:L:e:k")) != -1) {
|
||||
|
||||
|
||||
switch (carg) {
|
||||
/* -------------------- */
|
||||
case '1': /* prenier oligo */
|
||||
case '1': /* first primer */
|
||||
/* -------------------- */
|
||||
oligo1 = ECOMALLOC(strlen(optarg)+1,
|
||||
"Error on oligo 1 allocation");
|
||||
@ -343,7 +288,7 @@ int main(int argc, char **argv)
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case '2': /* coocurence option */
|
||||
case '2': /* second primer */
|
||||
/* -------------------- */
|
||||
oligo2 = ECOMALLOC(strlen(optarg)+1,
|
||||
"Error on oligo 1 allocation");
|
||||
@ -358,15 +303,15 @@ int main(int argc, char **argv)
|
||||
exit(0);
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'l': /* lmin amplification */
|
||||
/* -------------------- */
|
||||
/* ------------------------- */
|
||||
case 'l': /* min amplification lenght */
|
||||
/* ------------------------- */
|
||||
sscanf(optarg,"%d",&lmin);
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'L': /* lmax amplification */
|
||||
/* -------------------- */
|
||||
/* -------------------------- */
|
||||
case 'L': /* max amplification lenght */
|
||||
/* -------------------------- */
|
||||
sscanf(optarg,"%d",&lmax);
|
||||
break;
|
||||
|
||||
@ -375,8 +320,9 @@ int main(int argc, char **argv)
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%d",&error_max);
|
||||
break;
|
||||
|
||||
case 'k': /* error max */
|
||||
|
||||
/* -------------------- */
|
||||
case 'k': /* set the kingdom mode */
|
||||
/* -------------------- */
|
||||
kingdom_mode = 1;
|
||||
break;
|
||||
@ -388,7 +334,9 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* check the path to the database is given as last argument
|
||||
*/
|
||||
if ((argc -= optind) != 1)
|
||||
errflag++;
|
||||
|
||||
@ -428,7 +376,7 @@ int main(int argc, char **argv)
|
||||
printf("#\n");
|
||||
|
||||
taxonomy = read_taxonomy(prefix);
|
||||
|
||||
|
||||
seq = ecoseq_iterator(prefix);
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user