From 22216dc3a0d56765a9e7127d46e4afbca42f27a5 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 1 Jun 2007 14:16:06 +0000 Subject: [PATCH] git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@18 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- src/ecopcr.c | 144 ++++++++++++++++----------------------------------- 1 file changed, 46 insertions(+), 98 deletions(-) diff --git a/src/ecopcr.c b/src/ecopcr.c index 20a9bce..f6a168e 100644 --- a/src/ecopcr.c +++ b/src/ecopcr.c @@ -15,97 +15,42 @@ static void PrintHelp() { PP "------------------------------------------\n"); - PP " Apat Version %s\n", VERSION); + PP " ecoPCR Version %s\n", VERSION); PP "------------------------------------------\n"); - PP "synopsis : pattern(s) searching program\n"); - PP "usage: apat [options] patfile datafile\n"); + PP "synopsis : searching for sequence and taxonomy hybridingwith given primers\n"); + PP "usage: ecoPCR [options] datafile\n"); PP "------------------------------------------\n"); PP "options:\n"); - PP "-a code : [A]lphabet encoding for pattern\n"); - PP " code is one of : \n"); - PP " dna: use IUPAC equivalences for dna/rna\n"); - PP " prot: use IUPAC equivalences for proteins\n"); - PP " alpha: no equivalences, just treat plain symbols\n"); - PP " note: the equivalences are used in pattern only\n"); - PP " *not* in sequence(s) (see note (4) below)\n"); - PP " dft: alpha\n"); - PP "-c : [C]ooccurences\n"); - PP " print patterns cooccurence matrix \n"); - PP " dft: off\n"); - PP "-h : [H]elp - print help\n"); - PP "-m : [M]ultiple occurences\n"); - PP " see -q option \n"); - PP " dft: off\n"); - PP "-o file : [O]utput sequences\n"); - PP " additionaly output sequence(s) that match into\n"); - PP " 'file' in fasta format\n"); - PP " dft: off\n"); - PP "-p : no [Print] - don't printout hits\n"); - PP " when just counts are needed\n"); - PP " dft: off\n"); - PP "-q nn : [Quorum]\n"); - PP " printout result if at least nn\n"); - PP " different patterns are found on the sequence\n"); - PP " (with -m : at least nn different )\n"); - PP " dft: # of patterns read\n"); - PP "-s : no [Sort] - don't sort hits before printing\n"); - PP " usually hits are printed by increasing position\n"); - PP " this option will list them by pattern\n"); - PP " dft: off\n"); - PP "-t : [T]est sequence\n"); - PP " additionnaly check if sequences are uppercase\n"); - PP " this is mostly used for testing\n"); - PP " dft: off\n"); - PP "-u : [U]pper\n"); - PP " force lower->upper sequence conversion\n"); - PP " without this option lowercase symbols in sequence\n"); - PP " will not be considered to as matches\n"); - PP " dft: off\n"); - PP "-v : [V]erbose\n"); - PP " just display a kind of progress clock on stderr\n"); - PP " (this is only useful if you redirect stdout)\n"); - PP "\n"); - PP "patfile : pattern file (see below)\n"); - PP "datafile : database file (see below)\n"); - PP "------------------------------------------\n"); - PP "pattern file format :\n"); - PP " one pattern/line\n"); - PP " format : #errors\n"); - PP " := pattern\n"); - PP " or !pattern\n"); - PP " or pattern#\n"); - PP " or !pattern#\n"); - PP " := \n"); - PP " or [....]\n"); - PP " := uppercase letter (A-Z)\n"); - PP " := a positive number indicates max number of mismatches\n"); - PP " a negative number indicates max number of mismatches or indels\n"); - PP " # means that no error is allowed at this position\n"); - PP " ! complement the \n"); - PP " [...] means that all symbols within [] are allowed\n"); - PP " in addition IUPAC equivalences may be used as symbols\n"); - PP " with the -a option\n"); - PP "\n"); - PP "example: G[DE]S#[GIV]!HP![DE]# 1\n"); + PP "-1 : [FIRST] oligonucleotide for direct strand\n\n"); + PP "-2 : [SECOND] oligonucleotide for reverse strand\n\n"); + PP "-e : [E]rror \n"); + PP " : max error allowed by oligonucleotide\n\n"); + PP "-h : [H]elp - print help\n\n"); + PP "-i : [I]gnore the given taxonomy id.\n"); + PP " taxonomy id are available using the ecofind program.\n"); + PP " see its help typing ecofind -h for more information.\n"); + PP "-k : [K]ingdom mode\n"); + PP " set the kingdom mode\n"); + PP " super kingdom mode by default.\n\n"); + PP "-l : minimum [L]ength\n"); + PP " define the minimum amplication length. \n\n"); + PP "-L : maximum [L]ength\n"); + PP " define the maximum amplicationlength. \n\n"); + PP "-r : [R]estricts the search to the given taxonomy id.\n"); + PP " taxonomy id are available using the ecofind program.\n"); + PP " see its help typing ecofind -h for more information.\n"); PP "\n"); PP "------------------------------------------\n"); - PP "datafile contains one or more sequences in\n"); - PP "Fasta format, with *uppercase* symbols \n"); - PP "\n"); + PP "datafile : to match the expected format, the database\n"); + PP "has to be formated first by the ecoPCRFormat.py program located.\n"); + PP "in the tools directory.\n"); + PP "ecoPCRFormat.py creates three file types :"); + PP " .sdx : contains the sequences\n"); + PP " .tdx : contains information concerning the taxonomy\n"); + PP " .rdx : contains the taxonomy rank\n\n"); + PP "ecoPCR needs all the file type. As a result, you have to write the\n"); + PP "datafile radical without any extension. For example /database/gbmam\n"); PP "------------------------------------------\n"); - PP "note (1): the maximum number of patterns is %d\n", MAX_PATTERN); - PP "\n"); - PP "note (2): the maximum length for one pattern is %d\n", MAX_PAT_LEN); - PP "\n"); - PP "note (3): indels are still experimental and are :\n"); - PP " not handled gracefully with the # syntax\n"); - PP " and hits are not printed very nicely\n"); - PP "\n"); - PP "note (4): the IUPAC equivalences (-a option) are used\n"); - PP " in pattern only *not* in sequence(s).\n"); - PP " for instance GATN (with option -a dna) is equivalent to GAT[ACGT]\n"); - PP " and will match GATA/GATC/GATG/GATC but will not match GATN\n"); - PP " (nor NNNN) in sequence.\n"); PP "\n"); } @@ -332,10 +277,10 @@ int main(int argc, char **argv) while ((carg = getopt(argc, argv, "h1:2:l:L:e:k")) != -1) { - + switch (carg) { /* -------------------- */ - case '1': /* prenier oligo */ + case '1': /* first primer */ /* -------------------- */ oligo1 = ECOMALLOC(strlen(optarg)+1, "Error on oligo 1 allocation"); @@ -343,7 +288,7 @@ int main(int argc, char **argv) break; /* -------------------- */ - case '2': /* coocurence option */ + case '2': /* second primer */ /* -------------------- */ oligo2 = ECOMALLOC(strlen(optarg)+1, "Error on oligo 1 allocation"); @@ -358,15 +303,15 @@ int main(int argc, char **argv) exit(0); break; - /* -------------------- */ - case 'l': /* lmin amplification */ - /* -------------------- */ + /* ------------------------- */ + case 'l': /* min amplification lenght */ + /* ------------------------- */ sscanf(optarg,"%d",&lmin); break; - /* -------------------- */ - case 'L': /* lmax amplification */ - /* -------------------- */ + /* -------------------------- */ + case 'L': /* max amplification lenght */ + /* -------------------------- */ sscanf(optarg,"%d",&lmax); break; @@ -375,8 +320,9 @@ int main(int argc, char **argv) /* -------------------- */ sscanf(optarg,"%d",&error_max); break; - - case 'k': /* error max */ + + /* -------------------- */ + case 'k': /* set the kingdom mode */ /* -------------------- */ kingdom_mode = 1; break; @@ -388,7 +334,9 @@ int main(int argc, char **argv) } } - + /** + * check the path to the database is given as last argument + */ if ((argc -= optind) != 1) errflag++; @@ -428,7 +376,7 @@ int main(int argc, char **argv) printf("#\n"); taxonomy = read_taxonomy(prefix); - + seq = ecoseq_iterator(prefix);