From 45f27e8c875ffefb160d25553da81e780c68e804 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 5 Jun 2007 13:40:06 +0000 Subject: [PATCH] git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@37 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- src/ecopcr.c | 80 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 15 deletions(-) diff --git a/src/ecopcr.c b/src/ecopcr.c index 7a2788d..b00490f 100644 --- a/src/ecopcr.c +++ b/src/ecopcr.c @@ -17,7 +17,7 @@ static void PrintHelp() PP "------------------------------------------\n"); PP " ecoPCR Version %s\n", VERSION); PP "------------------------------------------\n"); - PP "synopsis : searching for sequence and taxonomy hybridingwith given primers\n"); + PP "synopsis : searching for sequence and taxonomy hybriding with given primers\n"); PP "usage: ecoPCR [options] datafile\n"); PP "------------------------------------------\n"); PP "options:\n"); @@ -51,6 +51,27 @@ static void PrintHelp() PP "ecoPCR needs all the file type. As a result, you have to write the\n"); PP "datafile radical without any extension. For example /database/gbmam\n"); PP "------------------------------------------\n"); + PP "Table result description : \n"); + PP "column 1 : \n"); + PP "column 2 : sequence length\n"); + PP "column 3 : taxonomic id\n"); + PP "column 4 : rank\n"); + PP "column 5 : species taxonomic id\n"); + PP "column 6 : scientific name\n"); + PP "column 7 : genus taxonomic id\n"); + PP "column 8 : genus name\n"); + PP "column 9 : family taxonomic id\n"); + PP "column 10 : family name\n"); + PP "column 11 : super kingdom taxonomic id\n"); + PP "column 11 : super kingdom name\n"); + PP "column 13 : strand (direct or reverse)\n"); + PP "column 14 : first oligonucleotide\n"); + PP "column 15 : number of errors for the first strand\n"); + PP "column 16 : second oligonucleotide\n"); + PP "column 17 : number of errors for the second strand\n"); + PP "column 18 : amplification length\n"); + PP "column 19 : \n"); + PP "------------------------------------------\n"); PP "\n"); } @@ -273,12 +294,17 @@ int main(int argc, char **argv) int32_t erri; int32_t errj; + int32_t *restricted_taxid = NULL; + int32_t *excluded_taxid = NULL; + int32_t r=0; + int32_t g=0; - while ((carg = getopt(argc, argv, "h1:2:l:L:e:k")) != -1) { + + while ((carg = getopt(argc, argv, "h1:2:l:L:e:E:r:k")) != -1) { switch (carg) { /* -------------------- */ - case '1': /* first primer */ + case '1': /* first primer */ /* -------------------- */ oligo1 = ECOMALLOC(strlen(optarg)+1, "Error on oligo 1 allocation"); @@ -286,7 +312,7 @@ int main(int argc, char **argv) break; /* -------------------- */ - case '2': /* second primer */ + case '2': /* second primer */ /* -------------------- */ oligo2 = ECOMALLOC(strlen(optarg)+1, "Error on oligo 1 allocation"); @@ -296,7 +322,6 @@ int main(int argc, char **argv) /* -------------------- */ case 'h': /* help */ /* -------------------- */ - PrintHelp(); exit(0); break; @@ -308,7 +333,7 @@ int main(int argc, char **argv) break; /* -------------------------- */ - case 'L': /* max amplification lenght */ + case 'L': /* max amplification lenght */ /* -------------------------- */ sscanf(optarg,"%d",&lmax); break; @@ -321,8 +346,25 @@ int main(int argc, char **argv) /* -------------------- */ case 'k': /* set the kingdom mode */ - /* -------------------- */ - kingdom_mode = 1; + kingdom_mode = 1; /* -------------------- */ + break; + + /* ------------------------------------------ */ + case 'r': /* stores the restricting search taxonomic id */ + /* ------------------------------------------ */ + restricted_taxid = ECOREALLOC(restricted_taxid,sizeof(int32_t)*(r+1), + "Error on restricted_taxid reallocation"); + sscanf(optarg,"%d",&restricted_taxid[r]); + r++; + break; + + /* --------------------------------- */ + case 'E': /* stores the taxonomic id to ignore */ + /* --------------------------------- */ + excluded_taxid = ECOREALLOC(excluded_taxid,sizeof(int32_t)*(g+1), + "Error on excluded_taxid reallocation"); + sscanf(optarg,"%d",&excluded_taxid[g]); + g++; break; /* -------------------- */ @@ -332,6 +374,7 @@ int main(int argc, char **argv) } } + /** * check the path to the database is given as last argument */ @@ -352,9 +395,7 @@ int main(int argc, char **argv) o1c = complementPattern(o1); o2c = complementPattern(o2); - - - printf("########\n"); + printf("#\n"); printf("# ecoPCR version %s\n",VERSION); printf("# direct strand oligo1 : %-32s ; oligo2c : %32s\n", o1->cpat,o2c->cpat); printf("# reverse strand oligo2 : %-32s ; oligo1c : %32s\n", o2->cpat,o1c->cpat); @@ -385,20 +426,29 @@ int main(int argc, char **argv) while(seq) { checkedSequence++; - + /** + * check if the sequence should be ignored + **/ + if ( (g > 0) && (eco_is_taxid_ignored(excluded_taxid, g, taxonomy->taxons->taxon[seq->taxid].taxid)) ) + goto next; + + /** + * check the sequence is included + **/ + if ( (r > 0) && (!eco_is_taxid_included(taxonomy, restricted_taxid, r, taxonomy->taxons->taxon[seq->taxid].taxid)) ) + goto next; + scname = taxonomy->taxons->taxon[seq->taxid].name; strncpy(head,seq->SQ,10); head[10]=0; strncpy(tail,seq->SQ+seq->SQ_length-10,10); tail[10]=0; - apatseq=ecoseq2apatseq(seq,apatseq); o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen); o2cHits= 0; - if (o1Hits) { stktmp = apatseq->hitpos[0]; @@ -463,7 +513,7 @@ int main(int argc, char **argv) } } - + next: delete_ecoseq(seq);