From 6a8bf94854be40d64ed09a6b77f9185aa236f9f5 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 6 Jun 2007 10:14:29 +0000 Subject: [PATCH] git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@44 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- src/ecofind_makefile | 22 ----- src/ecopcr.c | 181 +++++++++++++++++++------------------ src/libecoPCR/ecoError.c | 10 +- src/libecoPCR/ecoIOUtils.c | 27 +++--- src/libecoPCR/ecoPCR.h | 4 +- src/libecoPCR/ecofilter.c | 8 +- src/libecoPCR/ecoseq.c | 11 ++- 7 files changed, 134 insertions(+), 129 deletions(-) delete mode 100644 src/ecofind_makefile diff --git a/src/ecofind_makefile b/src/ecofind_makefile deleted file mode 100644 index 8b1cc88..0000000 --- a/src/ecofind_makefile +++ /dev/null @@ -1,22 +0,0 @@ - -CC=gcc -CFLAGS= -W -Wall -O2 -g -LDFLAGS= -lm -EXEC=ecofind -SRC= ecofind.c -OBJ= $(SRC:.c=.o) -LIBPATH= -LlibecoPCR/ -LIB= -lecoPCR - -MACHINE=MAC_OS_X - -all: $(EXEC) - -ecofind: $(OBJ) - $(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB) - -ecofind.o: $(SRC) - $(CC) -D$(MACHINE) -o $@ -c $< $(CFLAGS) - -clean: - rm -f *.o \ No newline at end of file diff --git a/src/ecopcr.c b/src/ecopcr.c index 2d7d777..d0a4939 100644 --- a/src/ecopcr.c +++ b/src/ecopcr.c @@ -52,7 +52,7 @@ static void PrintHelp() PP "datafile radical without any extension. For example /database/gbmam\n"); PP "------------------------------------------\n"); PP "Table result description : \n"); - PP "column 1 : \n"); + PP "column 1 : accession number\n"); PP "column 2 : sequence length\n"); PP "column 3 : taxonomic id\n"); PP "column 4 : rank\n"); @@ -70,7 +70,7 @@ static void PrintHelp() PP "column 16 : second oligonucleotide\n"); PP "column 17 : number of errors for the second strand\n"); PP "column 18 : amplification length\n"); - PP "column 19 : \n"); + PP "column 19 : sequence description\n"); PP "------------------------------------------\n"); PP "\n"); @@ -87,7 +87,7 @@ static void PrintHelp() static void ExitUsage(stat) int stat; { - PP "usage: ecoPCR [-1 oligo1] [-2 oligo2] [-l value] [-L value] [-e value] [-k] datafile\n"); + PP "usage: ecoPCR [-1 oligo1] [-2 oligo2] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-k] datafile\n"); PP "type \"ecoPCR -h\" for help\n"); if (stat) @@ -295,12 +295,12 @@ int main(int argc, char **argv) int32_t errj; int32_t *restricted_taxid = NULL; - int32_t *excluded_taxid = NULL; + int32_t *ignored_taxid = NULL; int32_t r=0; int32_t g=0; - while ((carg = getopt(argc, argv, "h1:2:l:L:e:E:r:k")) != -1) { + while ((carg = getopt(argc, argv, "h1:2:l:L:e:i:r:k")) != -1) { switch (carg) { /* -------------------- */ @@ -359,11 +359,11 @@ int main(int argc, char **argv) break; /* --------------------------------- */ - case 'E': /* stores the taxonomic id to ignore */ + case 'i': /* stores the taxonomic id to ignore */ /* --------------------------------- */ - excluded_taxid = ECOREALLOC(excluded_taxid,sizeof(int32_t)*(g+1), + ignored_taxid = ECOREALLOC(ignored_taxid,sizeof(int32_t)*(g+1), "Error on excluded_taxid reallocation"); - sscanf(optarg,"%d",&excluded_taxid[g]); + sscanf(optarg,"%d",&ignored_taxid[g]); g++; break; @@ -428,91 +428,98 @@ int main(int argc, char **argv) /** * check if current sequence should be ignored **/ - if ( (g > 0) && (eco_is_taxid_ignored(excluded_taxid, g, taxonomy->taxons->taxon[seq->taxid].taxid)) ) - goto next; - - /** - * check current sequence is included - **/ - if ( (r > 0) && (!eco_is_taxid_included(taxonomy, restricted_taxid, r, taxonomy->taxons->taxon[seq->taxid].taxid)) ) - goto next; + if ( (r == 0) || + (eco_is_taxid_included(taxonomy, + restricted_taxid, + r, + taxonomy->taxons->taxon[seq->taxid].taxid) + ) + ) + if ((g == 0) || + !(eco_is_taxid_included(taxonomy, + ignored_taxid, + g, + taxonomy->taxons->taxon[seq->taxid].taxid) + ) + ) + { - scname = taxonomy->taxons->taxon[seq->taxid].name; - strncpy(head,seq->SQ,10); - head[10]=0; - strncpy(tail,seq->SQ+seq->SQ_length-10,10); - tail[10]=0; - - apatseq=ecoseq2apatseq(seq,apatseq); + scname = taxonomy->taxons->taxon[seq->taxid].name; + strncpy(head,seq->SQ,10); + head[10]=0; + strncpy(tail,seq->SQ+seq->SQ_length-10,10); + tail[10]=0; - o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen); - o2cHits= 0; - - if (o1Hits) - { - stktmp = apatseq->hitpos[0]; - begin = stktmp->val[0] + o1->patlen; - - if (lmax) - length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen; - else - length= apatseq->seqlen - begin; + apatseq=ecoseq2apatseq(seq,apatseq); - o2cHits = ManberAll(apatseq,o2c,1,begin,length); - - if (o2cHits) - for (i=0; i < o1Hits;i++) - { - posi = apatseq->hitpos[0]->val[i]; - erri = apatseq->hiterr[0]->val[i]; - for (j=0; j < o2cHits; j++) - { - posj =apatseq->hitpos[1]->val[j] + o2c->patlen; - errj =apatseq->hiterr[1]->val[j]; - length=posj - posi + 1 - o1->patlen - o2->patlen; - - if ((!lmin || (length >= lmin)) && - (!lmax || (length <= lmax))) - printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy); - //printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname); - } - } - } - - o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen); - o1cHits= 0; - if (o2Hits) - { - stktmp = apatseq->hitpos[2]; - begin = stktmp->val[0] + o2->patlen; - - if (lmax) - length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen; - else - length= apatseq->seqlen - begin; + o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen); + o2cHits= 0; - o1cHits = ManberAll(apatseq,o1c,3,begin,length); - - if (o1cHits) - for (i=0; i < o2Hits;i++) + if (o1Hits) { - posi = apatseq->hitpos[2]->val[i]; - erri = apatseq->hiterr[2]->val[i]; - for (j=0; j < o1cHits; j++) - { - posj=apatseq->hitpos[3]->val[j] + o1c->patlen; - errj=apatseq->hiterr[3]->val[j]; - length=posj - posi + 1 - o1->patlen - o2->patlen; + stktmp = apatseq->hitpos[0]; + begin = stktmp->val[0] + o1->patlen; + + if (lmax) + length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen; + else + length= apatseq->seqlen - begin; - if ((!lmin || (length >= lmin)) && - (!lmax || (length <= lmax))) - printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy); - //printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname); - } - } - } + o2cHits = ManberAll(apatseq,o2c,1,begin,length); - next: + if (o2cHits) + for (i=0; i < o1Hits;i++) + { + posi = apatseq->hitpos[0]->val[i]; + erri = apatseq->hiterr[0]->val[i]; + for (j=0; j < o2cHits; j++) + { + posj =apatseq->hitpos[1]->val[j] + o2c->patlen; + errj =apatseq->hiterr[1]->val[j]; + length=posj - posi + 1 - o1->patlen - o2->patlen; + + if ((!lmin || (length >= lmin)) && + (!lmax || (length <= lmax))) + printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy); + //printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname); + } + } + } + + o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen); + o1cHits= 0; + if (o2Hits) + { + stktmp = apatseq->hitpos[2]; + begin = stktmp->val[0] + o2->patlen; + + if (lmax) + length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen; + else + length= apatseq->seqlen - begin; + + o1cHits = ManberAll(apatseq,o1c,3,begin,length); + + if (o1cHits) + for (i=0; i < o2Hits;i++) + { + posi = apatseq->hitpos[2]->val[i]; + erri = apatseq->hiterr[2]->val[i]; + for (j=0; j < o1cHits; j++) + { + posj=apatseq->hitpos[3]->val[j] + o1c->patlen; + errj=apatseq->hiterr[3]->val[j]; + length=posj - posi + 1 - o1->patlen - o2->patlen; + + if ((!lmin || (length >= lmin)) && + (!lmax || (length <= lmax))) + printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy); + //printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname); + } + } + } + + } /* End of taxonomic selection */ delete_ecoseq(seq); @@ -520,7 +527,7 @@ int main(int argc, char **argv) } ECOFREE(restricted_taxid, "Error: could not free restricted_taxid\n"); - ECOFREE(excluded_taxid, "Error: could not free excluded_taxid\n"); + ECOFREE(ignored_taxid, "Error: could not free excluded_taxid\n"); return 0; } diff --git a/src/libecoPCR/ecoError.c b/src/libecoPCR/ecoError.c index a52b9e9..00bbfa2 100644 --- a/src/libecoPCR/ecoError.c +++ b/src/libecoPCR/ecoError.c @@ -2,7 +2,15 @@ #include #include - +/* + * print the message given as argument and exit the program + * @param error error number + * @param message the text explaining what's going on + * @param filename the file source where the program failed + * @param linenumber the line where it has failed + * filename and linenumber are written at pre-processing + * time by a macro + */ void ecoError(int32_t error, const char* message, const char * filename, diff --git a/src/libecoPCR/ecoIOUtils.c b/src/libecoPCR/ecoIOUtils.c index 0f8cebc..8d7ce82 100644 --- a/src/libecoPCR/ecoIOUtils.c +++ b/src/libecoPCR/ecoIOUtils.c @@ -16,20 +16,19 @@ int32_t is_big_endian() - - - int32_t swap_int32_t(int32_t i) { return SWAPINT32(i); } - - - - - +/** + * Read part of the file + * @param *f the database + * @param recordSize the size to be read + * + * @return buffer + */ void *read_ecorecord(FILE *f,int32_t *recordSize) { static void *buffer =NULL; @@ -79,10 +78,14 @@ void *read_ecorecord(FILE *f,int32_t *recordSize) - - - - +/** + * Open the database and check it's readable + * @param filename name of the database (.sdx, .rdx, .tbx) + * @param sequencecount buffer - pointer to variable storing the number of occurence + * @param abort_on_open_error boolean to define the behaviour in case of error + * while opening the database + * @return FILE type + **/ FILE *open_ecorecorddb(const char *filename, int32_t *sequencecount, int32_t abort_on_open_error) diff --git a/src/libecoPCR/ecoPCR.h b/src/libecoPCR/ecoPCR.h index 08a325f..2229151 100644 --- a/src/libecoPCR/ecoPCR.h +++ b/src/libecoPCR/ecoPCR.h @@ -230,7 +230,7 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy); ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); -int eco_is_taxid_ignored(int *ignored_taxid, int tab_len, int taxid); -int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int *included_taxid, int tab_len, int taxid); +int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid); +int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid); #endif /*ECOPCR_H_*/ diff --git a/src/libecoPCR/ecofilter.c b/src/libecoPCR/ecofilter.c index d2a547f..52e6dbc 100644 --- a/src/libecoPCR/ecofilter.c +++ b/src/libecoPCR/ecofilter.c @@ -1,8 +1,8 @@ #include "ecoPCR.h" -int eco_is_taxid_ignored( int *ignored_taxid, - int tab_len, - int taxid) +int eco_is_taxid_ignored( int32_t *ignored_taxid, + int32_t tab_len, + int32_t taxid) { int i; for (i=0; i < tab_len; i++){ @@ -14,7 +14,7 @@ int eco_is_taxid_ignored( int *ignored_taxid, int eco_is_taxid_included( ecotaxonomy_t *taxonomy, - int *restricted_taxid, + int32_t *restricted_taxid, int32_t tab_len, int32_t taxid) { diff --git a/src/libecoPCR/ecoseq.c b/src/libecoPCR/ecoseq.c index cc25ad5..e40e919 100644 --- a/src/libecoPCR/ecoseq.c +++ b/src/libecoPCR/ecoseq.c @@ -79,7 +79,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC, } - +/** + * ?? used ?? + **/ FILE *open_ecoseqdb(const char *filename, int32_t *sequencecount) { @@ -140,6 +142,13 @@ ecoseq_t *readnext_ecoseq(FILE *f) return seq; } +/** + * Open the sequences database (.sdx file) + * @param prefix name of the database (radical without extension) + * @param index integer + * + * @return file object + */ FILE *open_seqfile(const char *prefix,int32_t index) { char filename_buffer[1024];