This commit is contained in:
2007-06-06 10:14:29 +00:00
parent 3117f978c2
commit 6a8bf94854
7 changed files with 134 additions and 129 deletions

View File

@ -1,22 +0,0 @@
CC=gcc
CFLAGS= -W -Wall -O2 -g
LDFLAGS= -lm
EXEC=ecofind
SRC= ecofind.c
OBJ= $(SRC:.c=.o)
LIBPATH= -LlibecoPCR/
LIB= -lecoPCR
MACHINE=MAC_OS_X
all: $(EXEC)
ecofind: $(OBJ)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
ecofind.o: $(SRC)
$(CC) -D$(MACHINE) -o $@ -c $< $(CFLAGS)
clean:
rm -f *.o

View File

@ -52,7 +52,7 @@ static void PrintHelp()
PP "datafile radical without any extension. For example /database/gbmam\n");
PP "------------------------------------------\n");
PP "Table result description : \n");
PP "column 1 : \n");
PP "column 1 : accession number\n");
PP "column 2 : sequence length\n");
PP "column 3 : taxonomic id\n");
PP "column 4 : rank\n");
@ -70,7 +70,7 @@ static void PrintHelp()
PP "column 16 : second oligonucleotide\n");
PP "column 17 : number of errors for the second strand\n");
PP "column 18 : amplification length\n");
PP "column 19 : \n");
PP "column 19 : sequence description\n");
PP "------------------------------------------\n");
PP "\n");
@ -87,7 +87,7 @@ static void PrintHelp()
static void ExitUsage(stat)
int stat;
{
PP "usage: ecoPCR [-1 oligo1] [-2 oligo2] [-l value] [-L value] [-e value] [-k] datafile\n");
PP "usage: ecoPCR [-1 oligo1] [-2 oligo2] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-k] datafile\n");
PP "type \"ecoPCR -h\" for help\n");
if (stat)
@ -295,12 +295,12 @@ int main(int argc, char **argv)
int32_t errj;
int32_t *restricted_taxid = NULL;
int32_t *excluded_taxid = NULL;
int32_t *ignored_taxid = NULL;
int32_t r=0;
int32_t g=0;
while ((carg = getopt(argc, argv, "h1:2:l:L:e:E:r:k")) != -1) {
while ((carg = getopt(argc, argv, "h1:2:l:L:e:i:r:k")) != -1) {
switch (carg) {
/* -------------------- */
@ -359,11 +359,11 @@ int main(int argc, char **argv)
break;
/* --------------------------------- */
case 'E': /* stores the taxonomic id to ignore */
case 'i': /* stores the taxonomic id to ignore */
/* --------------------------------- */
excluded_taxid = ECOREALLOC(excluded_taxid,sizeof(int32_t)*(g+1),
ignored_taxid = ECOREALLOC(ignored_taxid,sizeof(int32_t)*(g+1),
"Error on excluded_taxid reallocation");
sscanf(optarg,"%d",&excluded_taxid[g]);
sscanf(optarg,"%d",&ignored_taxid[g]);
g++;
break;
@ -428,91 +428,98 @@ int main(int argc, char **argv)
/**
* check if current sequence should be ignored
**/
if ( (g > 0) && (eco_is_taxid_ignored(excluded_taxid, g, taxonomy->taxons->taxon[seq->taxid].taxid)) )
goto next;
/**
* check current sequence is included
**/
if ( (r > 0) && (!eco_is_taxid_included(taxonomy, restricted_taxid, r, taxonomy->taxons->taxon[seq->taxid].taxid)) )
goto next;
if ( (r == 0) ||
(eco_is_taxid_included(taxonomy,
restricted_taxid,
r,
taxonomy->taxons->taxon[seq->taxid].taxid)
)
)
if ((g == 0) ||
!(eco_is_taxid_included(taxonomy,
ignored_taxid,
g,
taxonomy->taxons->taxon[seq->taxid].taxid)
)
)
{
scname = taxonomy->taxons->taxon[seq->taxid].name;
strncpy(head,seq->SQ,10);
head[10]=0;
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
tail[10]=0;
apatseq=ecoseq2apatseq(seq,apatseq);
scname = taxonomy->taxons->taxon[seq->taxid].name;
strncpy(head,seq->SQ,10);
head[10]=0;
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
tail[10]=0;
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
o2cHits= 0;
if (o1Hits)
{
stktmp = apatseq->hitpos[0];
begin = stktmp->val[0] + o1->patlen;
if (lmax)
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
else
length= apatseq->seqlen - begin;
apatseq=ecoseq2apatseq(seq,apatseq);
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
if (o2cHits)
for (i=0; i < o1Hits;i++)
{
posi = apatseq->hitpos[0]->val[i];
erri = apatseq->hiterr[0]->val[i];
for (j=0; j < o2cHits; j++)
{
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
errj =apatseq->hiterr[1]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
}
}
}
o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
o1cHits= 0;
if (o2Hits)
{
stktmp = apatseq->hitpos[2];
begin = stktmp->val[0] + o2->patlen;
if (lmax)
length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
else
length= apatseq->seqlen - begin;
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
o2cHits= 0;
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
if (o1cHits)
for (i=0; i < o2Hits;i++)
if (o1Hits)
{
posi = apatseq->hitpos[2]->val[i];
erri = apatseq->hiterr[2]->val[i];
for (j=0; j < o1cHits; j++)
{
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
errj=apatseq->hiterr[3]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
stktmp = apatseq->hitpos[0];
begin = stktmp->val[0] + o1->patlen;
if (lmax)
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
else
length= apatseq->seqlen - begin;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
}
}
}
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
next:
if (o2cHits)
for (i=0; i < o1Hits;i++)
{
posi = apatseq->hitpos[0]->val[i];
erri = apatseq->hiterr[0]->val[i];
for (j=0; j < o2cHits; j++)
{
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
errj =apatseq->hiterr[1]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
}
}
}
o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
o1cHits= 0;
if (o2Hits)
{
stktmp = apatseq->hitpos[2];
begin = stktmp->val[0] + o2->patlen;
if (lmax)
length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
else
length= apatseq->seqlen - begin;
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
if (o1cHits)
for (i=0; i < o2Hits;i++)
{
posi = apatseq->hitpos[2]->val[i];
erri = apatseq->hiterr[2]->val[i];
for (j=0; j < o1cHits; j++)
{
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
errj=apatseq->hiterr[3]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
}
}
}
} /* End of taxonomic selection */
delete_ecoseq(seq);
@ -520,7 +527,7 @@ int main(int argc, char **argv)
}
ECOFREE(restricted_taxid, "Error: could not free restricted_taxid\n");
ECOFREE(excluded_taxid, "Error: could not free excluded_taxid\n");
ECOFREE(ignored_taxid, "Error: could not free excluded_taxid\n");
return 0;
}

View File

@ -2,7 +2,15 @@
#include <stdio.h>
#include <stdlib.h>
/*
* print the message given as argument and exit the program
* @param error error number
* @param message the text explaining what's going on
* @param filename the file source where the program failed
* @param linenumber the line where it has failed
* filename and linenumber are written at pre-processing
* time by a macro
*/
void ecoError(int32_t error,
const char* message,
const char * filename,

View File

@ -16,20 +16,19 @@ int32_t is_big_endian()
int32_t swap_int32_t(int32_t i)
{
return SWAPINT32(i);
}
/**
* Read part of the file
* @param *f the database
* @param recordSize the size to be read
*
* @return buffer
*/
void *read_ecorecord(FILE *f,int32_t *recordSize)
{
static void *buffer =NULL;
@ -79,10 +78,14 @@ void *read_ecorecord(FILE *f,int32_t *recordSize)
/**
* Open the database and check it's readable
* @param filename name of the database (.sdx, .rdx, .tbx)
* @param sequencecount buffer - pointer to variable storing the number of occurence
* @param abort_on_open_error boolean to define the behaviour in case of error
* while opening the database
* @return FILE type
**/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error)

View File

@ -230,7 +230,7 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
int eco_is_taxid_ignored(int *ignored_taxid, int tab_len, int taxid);
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int *included_taxid, int tab_len, int taxid);
int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
#endif /*ECOPCR_H_*/

View File

@ -1,8 +1,8 @@
#include "ecoPCR.h"
int eco_is_taxid_ignored( int *ignored_taxid,
int tab_len,
int taxid)
int eco_is_taxid_ignored( int32_t *ignored_taxid,
int32_t tab_len,
int32_t taxid)
{
int i;
for (i=0; i < tab_len; i++){
@ -14,7 +14,7 @@ int eco_is_taxid_ignored( int *ignored_taxid,
int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
int *restricted_taxid,
int32_t *restricted_taxid,
int32_t tab_len,
int32_t taxid)
{

View File

@ -79,7 +79,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
}
/**
* ?? used ??
**/
FILE *open_ecoseqdb(const char *filename,
int32_t *sequencecount)
{
@ -140,6 +142,13 @@ ecoseq_t *readnext_ecoseq(FILE *f)
return seq;
}
/**
* Open the sequences database (.sdx file)
* @param prefix name of the database (radical without extension)
* @param index integer
*
* @return file object
*/
FILE *open_seqfile(const char *prefix,int32_t index)
{
char filename_buffer[1024];