git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@44 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -1,22 +0,0 @@
|
||||
|
||||
CC=gcc
|
||||
CFLAGS= -W -Wall -O2 -g
|
||||
LDFLAGS= -lm
|
||||
EXEC=ecofind
|
||||
SRC= ecofind.c
|
||||
OBJ= $(SRC:.c=.o)
|
||||
LIBPATH= -LlibecoPCR/
|
||||
LIB= -lecoPCR
|
||||
|
||||
MACHINE=MAC_OS_X
|
||||
|
||||
all: $(EXEC)
|
||||
|
||||
ecofind: $(OBJ)
|
||||
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
|
||||
|
||||
ecofind.o: $(SRC)
|
||||
$(CC) -D$(MACHINE) -o $@ -c $< $(CFLAGS)
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
181
src/ecopcr.c
181
src/ecopcr.c
@ -52,7 +52,7 @@ static void PrintHelp()
|
||||
PP "datafile radical without any extension. For example /database/gbmam\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "Table result description : \n");
|
||||
PP "column 1 : \n");
|
||||
PP "column 1 : accession number\n");
|
||||
PP "column 2 : sequence length\n");
|
||||
PP "column 3 : taxonomic id\n");
|
||||
PP "column 4 : rank\n");
|
||||
@ -70,7 +70,7 @@ static void PrintHelp()
|
||||
PP "column 16 : second oligonucleotide\n");
|
||||
PP "column 17 : number of errors for the second strand\n");
|
||||
PP "column 18 : amplification length\n");
|
||||
PP "column 19 : \n");
|
||||
PP "column 19 : sequence description\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "\n");
|
||||
|
||||
@ -87,7 +87,7 @@ static void PrintHelp()
|
||||
static void ExitUsage(stat)
|
||||
int stat;
|
||||
{
|
||||
PP "usage: ecoPCR [-1 oligo1] [-2 oligo2] [-l value] [-L value] [-e value] [-k] datafile\n");
|
||||
PP "usage: ecoPCR [-1 oligo1] [-2 oligo2] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-k] datafile\n");
|
||||
PP "type \"ecoPCR -h\" for help\n");
|
||||
|
||||
if (stat)
|
||||
@ -295,12 +295,12 @@ int main(int argc, char **argv)
|
||||
int32_t errj;
|
||||
|
||||
int32_t *restricted_taxid = NULL;
|
||||
int32_t *excluded_taxid = NULL;
|
||||
int32_t *ignored_taxid = NULL;
|
||||
int32_t r=0;
|
||||
int32_t g=0;
|
||||
|
||||
|
||||
while ((carg = getopt(argc, argv, "h1:2:l:L:e:E:r:k")) != -1) {
|
||||
while ((carg = getopt(argc, argv, "h1:2:l:L:e:i:r:k")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* -------------------- */
|
||||
@ -359,11 +359,11 @@ int main(int argc, char **argv)
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'E': /* stores the taxonomic id to ignore */
|
||||
case 'i': /* stores the taxonomic id to ignore */
|
||||
/* --------------------------------- */
|
||||
excluded_taxid = ECOREALLOC(excluded_taxid,sizeof(int32_t)*(g+1),
|
||||
ignored_taxid = ECOREALLOC(ignored_taxid,sizeof(int32_t)*(g+1),
|
||||
"Error on excluded_taxid reallocation");
|
||||
sscanf(optarg,"%d",&excluded_taxid[g]);
|
||||
sscanf(optarg,"%d",&ignored_taxid[g]);
|
||||
g++;
|
||||
break;
|
||||
|
||||
@ -428,91 +428,98 @@ int main(int argc, char **argv)
|
||||
/**
|
||||
* check if current sequence should be ignored
|
||||
**/
|
||||
if ( (g > 0) && (eco_is_taxid_ignored(excluded_taxid, g, taxonomy->taxons->taxon[seq->taxid].taxid)) )
|
||||
goto next;
|
||||
|
||||
/**
|
||||
* check current sequence is included
|
||||
**/
|
||||
if ( (r > 0) && (!eco_is_taxid_included(taxonomy, restricted_taxid, r, taxonomy->taxons->taxon[seq->taxid].taxid)) )
|
||||
goto next;
|
||||
if ( (r == 0) ||
|
||||
(eco_is_taxid_included(taxonomy,
|
||||
restricted_taxid,
|
||||
r,
|
||||
taxonomy->taxons->taxon[seq->taxid].taxid)
|
||||
)
|
||||
)
|
||||
if ((g == 0) ||
|
||||
!(eco_is_taxid_included(taxonomy,
|
||||
ignored_taxid,
|
||||
g,
|
||||
taxonomy->taxons->taxon[seq->taxid].taxid)
|
||||
)
|
||||
)
|
||||
{
|
||||
|
||||
scname = taxonomy->taxons->taxon[seq->taxid].name;
|
||||
strncpy(head,seq->SQ,10);
|
||||
head[10]=0;
|
||||
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
|
||||
tail[10]=0;
|
||||
|
||||
apatseq=ecoseq2apatseq(seq,apatseq);
|
||||
scname = taxonomy->taxons->taxon[seq->taxid].name;
|
||||
strncpy(head,seq->SQ,10);
|
||||
head[10]=0;
|
||||
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
|
||||
tail[10]=0;
|
||||
|
||||
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
|
||||
o2cHits= 0;
|
||||
|
||||
if (o1Hits)
|
||||
{
|
||||
stktmp = apatseq->hitpos[0];
|
||||
begin = stktmp->val[0] + o1->patlen;
|
||||
|
||||
if (lmax)
|
||||
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
|
||||
else
|
||||
length= apatseq->seqlen - begin;
|
||||
apatseq=ecoseq2apatseq(seq,apatseq);
|
||||
|
||||
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
|
||||
|
||||
if (o2cHits)
|
||||
for (i=0; i < o1Hits;i++)
|
||||
{
|
||||
posi = apatseq->hitpos[0]->val[i];
|
||||
erri = apatseq->hiterr[0]->val[i];
|
||||
for (j=0; j < o2cHits; j++)
|
||||
{
|
||||
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
|
||||
errj =apatseq->hiterr[1]->val[j];
|
||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||
|
||||
if ((!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
|
||||
o1cHits= 0;
|
||||
if (o2Hits)
|
||||
{
|
||||
stktmp = apatseq->hitpos[2];
|
||||
begin = stktmp->val[0] + o2->patlen;
|
||||
|
||||
if (lmax)
|
||||
length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
|
||||
else
|
||||
length= apatseq->seqlen - begin;
|
||||
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
|
||||
o2cHits= 0;
|
||||
|
||||
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
|
||||
|
||||
if (o1cHits)
|
||||
for (i=0; i < o2Hits;i++)
|
||||
if (o1Hits)
|
||||
{
|
||||
posi = apatseq->hitpos[2]->val[i];
|
||||
erri = apatseq->hiterr[2]->val[i];
|
||||
for (j=0; j < o1cHits; j++)
|
||||
{
|
||||
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
|
||||
errj=apatseq->hiterr[3]->val[j];
|
||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||
stktmp = apatseq->hitpos[0];
|
||||
begin = stktmp->val[0] + o1->patlen;
|
||||
|
||||
if (lmax)
|
||||
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
|
||||
else
|
||||
length= apatseq->seqlen - begin;
|
||||
|
||||
if ((!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
||||
}
|
||||
}
|
||||
}
|
||||
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
|
||||
|
||||
next:
|
||||
if (o2cHits)
|
||||
for (i=0; i < o1Hits;i++)
|
||||
{
|
||||
posi = apatseq->hitpos[0]->val[i];
|
||||
erri = apatseq->hiterr[0]->val[i];
|
||||
for (j=0; j < o2cHits; j++)
|
||||
{
|
||||
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
|
||||
errj =apatseq->hiterr[1]->val[j];
|
||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||
|
||||
if ((!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
|
||||
o1cHits= 0;
|
||||
if (o2Hits)
|
||||
{
|
||||
stktmp = apatseq->hitpos[2];
|
||||
begin = stktmp->val[0] + o2->patlen;
|
||||
|
||||
if (lmax)
|
||||
length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
|
||||
else
|
||||
length= apatseq->seqlen - begin;
|
||||
|
||||
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
|
||||
|
||||
if (o1cHits)
|
||||
for (i=0; i < o2Hits;i++)
|
||||
{
|
||||
posi = apatseq->hitpos[2]->val[i];
|
||||
erri = apatseq->hiterr[2]->val[i];
|
||||
for (j=0; j < o1cHits; j++)
|
||||
{
|
||||
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
|
||||
errj=apatseq->hiterr[3]->val[j];
|
||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||
|
||||
if ((!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} /* End of taxonomic selection */
|
||||
|
||||
delete_ecoseq(seq);
|
||||
|
||||
@ -520,7 +527,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
ECOFREE(restricted_taxid, "Error: could not free restricted_taxid\n");
|
||||
ECOFREE(excluded_taxid, "Error: could not free excluded_taxid\n");
|
||||
ECOFREE(ignored_taxid, "Error: could not free excluded_taxid\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2,7 +2,15 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
/*
|
||||
* print the message given as argument and exit the program
|
||||
* @param error error number
|
||||
* @param message the text explaining what's going on
|
||||
* @param filename the file source where the program failed
|
||||
* @param linenumber the line where it has failed
|
||||
* filename and linenumber are written at pre-processing
|
||||
* time by a macro
|
||||
*/
|
||||
void ecoError(int32_t error,
|
||||
const char* message,
|
||||
const char * filename,
|
||||
|
@ -16,20 +16,19 @@ int32_t is_big_endian()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int32_t swap_int32_t(int32_t i)
|
||||
{
|
||||
return SWAPINT32(i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Read part of the file
|
||||
* @param *f the database
|
||||
* @param recordSize the size to be read
|
||||
*
|
||||
* @return buffer
|
||||
*/
|
||||
void *read_ecorecord(FILE *f,int32_t *recordSize)
|
||||
{
|
||||
static void *buffer =NULL;
|
||||
@ -79,10 +78,14 @@ void *read_ecorecord(FILE *f,int32_t *recordSize)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Open the database and check it's readable
|
||||
* @param filename name of the database (.sdx, .rdx, .tbx)
|
||||
* @param sequencecount buffer - pointer to variable storing the number of occurence
|
||||
* @param abort_on_open_error boolean to define the behaviour in case of error
|
||||
* while opening the database
|
||||
* @return FILE type
|
||||
**/
|
||||
FILE *open_ecorecorddb(const char *filename,
|
||||
int32_t *sequencecount,
|
||||
int32_t abort_on_open_error)
|
||||
|
@ -230,7 +230,7 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
|
||||
int eco_is_taxid_ignored(int *ignored_taxid, int tab_len, int taxid);
|
||||
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int *included_taxid, int tab_len, int taxid);
|
||||
int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
|
||||
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
|
||||
|
||||
#endif /*ECOPCR_H_*/
|
||||
|
@ -1,8 +1,8 @@
|
||||
#include "ecoPCR.h"
|
||||
|
||||
int eco_is_taxid_ignored( int *ignored_taxid,
|
||||
int tab_len,
|
||||
int taxid)
|
||||
int eco_is_taxid_ignored( int32_t *ignored_taxid,
|
||||
int32_t tab_len,
|
||||
int32_t taxid)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i < tab_len; i++){
|
||||
@ -14,7 +14,7 @@ int eco_is_taxid_ignored( int *ignored_taxid,
|
||||
|
||||
|
||||
int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
|
||||
int *restricted_taxid,
|
||||
int32_t *restricted_taxid,
|
||||
int32_t tab_len,
|
||||
int32_t taxid)
|
||||
{
|
||||
|
@ -79,7 +79,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ?? used ??
|
||||
**/
|
||||
FILE *open_ecoseqdb(const char *filename,
|
||||
int32_t *sequencecount)
|
||||
{
|
||||
@ -140,6 +142,13 @@ ecoseq_t *readnext_ecoseq(FILE *f)
|
||||
return seq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the sequences database (.sdx file)
|
||||
* @param prefix name of the database (radical without extension)
|
||||
* @param index integer
|
||||
*
|
||||
* @return file object
|
||||
*/
|
||||
FILE *open_seqfile(const char *prefix,int32_t index)
|
||||
{
|
||||
char filename_buffer[1024];
|
||||
|
Reference in New Issue
Block a user