git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@44 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
CC=gcc
|
|
||||||
CFLAGS= -W -Wall -O2 -g
|
|
||||||
LDFLAGS= -lm
|
|
||||||
EXEC=ecofind
|
|
||||||
SRC= ecofind.c
|
|
||||||
OBJ= $(SRC:.c=.o)
|
|
||||||
LIBPATH= -LlibecoPCR/
|
|
||||||
LIB= -lecoPCR
|
|
||||||
|
|
||||||
MACHINE=MAC_OS_X
|
|
||||||
|
|
||||||
all: $(EXEC)
|
|
||||||
|
|
||||||
ecofind: $(OBJ)
|
|
||||||
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
|
|
||||||
|
|
||||||
ecofind.o: $(SRC)
|
|
||||||
$(CC) -D$(MACHINE) -o $@ -c $< $(CFLAGS)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f *.o
|
|
181
src/ecopcr.c
181
src/ecopcr.c
@ -52,7 +52,7 @@ static void PrintHelp()
|
|||||||
PP "datafile radical without any extension. For example /database/gbmam\n");
|
PP "datafile radical without any extension. For example /database/gbmam\n");
|
||||||
PP "------------------------------------------\n");
|
PP "------------------------------------------\n");
|
||||||
PP "Table result description : \n");
|
PP "Table result description : \n");
|
||||||
PP "column 1 : \n");
|
PP "column 1 : accession number\n");
|
||||||
PP "column 2 : sequence length\n");
|
PP "column 2 : sequence length\n");
|
||||||
PP "column 3 : taxonomic id\n");
|
PP "column 3 : taxonomic id\n");
|
||||||
PP "column 4 : rank\n");
|
PP "column 4 : rank\n");
|
||||||
@ -70,7 +70,7 @@ static void PrintHelp()
|
|||||||
PP "column 16 : second oligonucleotide\n");
|
PP "column 16 : second oligonucleotide\n");
|
||||||
PP "column 17 : number of errors for the second strand\n");
|
PP "column 17 : number of errors for the second strand\n");
|
||||||
PP "column 18 : amplification length\n");
|
PP "column 18 : amplification length\n");
|
||||||
PP "column 19 : \n");
|
PP "column 19 : sequence description\n");
|
||||||
PP "------------------------------------------\n");
|
PP "------------------------------------------\n");
|
||||||
PP "\n");
|
PP "\n");
|
||||||
|
|
||||||
@ -87,7 +87,7 @@ static void PrintHelp()
|
|||||||
static void ExitUsage(stat)
|
static void ExitUsage(stat)
|
||||||
int stat;
|
int stat;
|
||||||
{
|
{
|
||||||
PP "usage: ecoPCR [-1 oligo1] [-2 oligo2] [-l value] [-L value] [-e value] [-k] datafile\n");
|
PP "usage: ecoPCR [-1 oligo1] [-2 oligo2] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-k] datafile\n");
|
||||||
PP "type \"ecoPCR -h\" for help\n");
|
PP "type \"ecoPCR -h\" for help\n");
|
||||||
|
|
||||||
if (stat)
|
if (stat)
|
||||||
@ -295,12 +295,12 @@ int main(int argc, char **argv)
|
|||||||
int32_t errj;
|
int32_t errj;
|
||||||
|
|
||||||
int32_t *restricted_taxid = NULL;
|
int32_t *restricted_taxid = NULL;
|
||||||
int32_t *excluded_taxid = NULL;
|
int32_t *ignored_taxid = NULL;
|
||||||
int32_t r=0;
|
int32_t r=0;
|
||||||
int32_t g=0;
|
int32_t g=0;
|
||||||
|
|
||||||
|
|
||||||
while ((carg = getopt(argc, argv, "h1:2:l:L:e:E:r:k")) != -1) {
|
while ((carg = getopt(argc, argv, "h1:2:l:L:e:i:r:k")) != -1) {
|
||||||
|
|
||||||
switch (carg) {
|
switch (carg) {
|
||||||
/* -------------------- */
|
/* -------------------- */
|
||||||
@ -359,11 +359,11 @@ int main(int argc, char **argv)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
/* --------------------------------- */
|
/* --------------------------------- */
|
||||||
case 'E': /* stores the taxonomic id to ignore */
|
case 'i': /* stores the taxonomic id to ignore */
|
||||||
/* --------------------------------- */
|
/* --------------------------------- */
|
||||||
excluded_taxid = ECOREALLOC(excluded_taxid,sizeof(int32_t)*(g+1),
|
ignored_taxid = ECOREALLOC(ignored_taxid,sizeof(int32_t)*(g+1),
|
||||||
"Error on excluded_taxid reallocation");
|
"Error on excluded_taxid reallocation");
|
||||||
sscanf(optarg,"%d",&excluded_taxid[g]);
|
sscanf(optarg,"%d",&ignored_taxid[g]);
|
||||||
g++;
|
g++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -428,91 +428,98 @@ int main(int argc, char **argv)
|
|||||||
/**
|
/**
|
||||||
* check if current sequence should be ignored
|
* check if current sequence should be ignored
|
||||||
**/
|
**/
|
||||||
if ( (g > 0) && (eco_is_taxid_ignored(excluded_taxid, g, taxonomy->taxons->taxon[seq->taxid].taxid)) )
|
if ( (r == 0) ||
|
||||||
goto next;
|
(eco_is_taxid_included(taxonomy,
|
||||||
|
restricted_taxid,
|
||||||
/**
|
r,
|
||||||
* check current sequence is included
|
taxonomy->taxons->taxon[seq->taxid].taxid)
|
||||||
**/
|
)
|
||||||
if ( (r > 0) && (!eco_is_taxid_included(taxonomy, restricted_taxid, r, taxonomy->taxons->taxon[seq->taxid].taxid)) )
|
)
|
||||||
goto next;
|
if ((g == 0) ||
|
||||||
|
!(eco_is_taxid_included(taxonomy,
|
||||||
|
ignored_taxid,
|
||||||
|
g,
|
||||||
|
taxonomy->taxons->taxon[seq->taxid].taxid)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
{
|
||||||
|
|
||||||
scname = taxonomy->taxons->taxon[seq->taxid].name;
|
scname = taxonomy->taxons->taxon[seq->taxid].name;
|
||||||
strncpy(head,seq->SQ,10);
|
strncpy(head,seq->SQ,10);
|
||||||
head[10]=0;
|
head[10]=0;
|
||||||
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
|
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
|
||||||
tail[10]=0;
|
tail[10]=0;
|
||||||
|
|
||||||
apatseq=ecoseq2apatseq(seq,apatseq);
|
|
||||||
|
|
||||||
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
|
apatseq=ecoseq2apatseq(seq,apatseq);
|
||||||
o2cHits= 0;
|
|
||||||
|
|
||||||
if (o1Hits)
|
|
||||||
{
|
|
||||||
stktmp = apatseq->hitpos[0];
|
|
||||||
begin = stktmp->val[0] + o1->patlen;
|
|
||||||
|
|
||||||
if (lmax)
|
|
||||||
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
|
|
||||||
else
|
|
||||||
length= apatseq->seqlen - begin;
|
|
||||||
|
|
||||||
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
|
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
|
||||||
|
o2cHits= 0;
|
||||||
if (o2cHits)
|
|
||||||
for (i=0; i < o1Hits;i++)
|
|
||||||
{
|
|
||||||
posi = apatseq->hitpos[0]->val[i];
|
|
||||||
erri = apatseq->hiterr[0]->val[i];
|
|
||||||
for (j=0; j < o2cHits; j++)
|
|
||||||
{
|
|
||||||
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
|
|
||||||
errj =apatseq->hiterr[1]->val[j];
|
|
||||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
|
||||||
|
|
||||||
if ((!lmin || (length >= lmin)) &&
|
|
||||||
(!lmax || (length <= lmax)))
|
|
||||||
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
|
||||||
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
|
|
||||||
o1cHits= 0;
|
|
||||||
if (o2Hits)
|
|
||||||
{
|
|
||||||
stktmp = apatseq->hitpos[2];
|
|
||||||
begin = stktmp->val[0] + o2->patlen;
|
|
||||||
|
|
||||||
if (lmax)
|
|
||||||
length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
|
|
||||||
else
|
|
||||||
length= apatseq->seqlen - begin;
|
|
||||||
|
|
||||||
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
|
if (o1Hits)
|
||||||
|
|
||||||
if (o1cHits)
|
|
||||||
for (i=0; i < o2Hits;i++)
|
|
||||||
{
|
{
|
||||||
posi = apatseq->hitpos[2]->val[i];
|
stktmp = apatseq->hitpos[0];
|
||||||
erri = apatseq->hiterr[2]->val[i];
|
begin = stktmp->val[0] + o1->patlen;
|
||||||
for (j=0; j < o1cHits; j++)
|
|
||||||
{
|
if (lmax)
|
||||||
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
|
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
|
||||||
errj=apatseq->hiterr[3]->val[j];
|
else
|
||||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
length= apatseq->seqlen - begin;
|
||||||
|
|
||||||
if ((!lmin || (length >= lmin)) &&
|
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
|
||||||
(!lmax || (length <= lmax)))
|
|
||||||
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
|
||||||
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
next:
|
if (o2cHits)
|
||||||
|
for (i=0; i < o1Hits;i++)
|
||||||
|
{
|
||||||
|
posi = apatseq->hitpos[0]->val[i];
|
||||||
|
erri = apatseq->hiterr[0]->val[i];
|
||||||
|
for (j=0; j < o2cHits; j++)
|
||||||
|
{
|
||||||
|
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
|
||||||
|
errj =apatseq->hiterr[1]->val[j];
|
||||||
|
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||||
|
|
||||||
|
if ((!lmin || (length >= lmin)) &&
|
||||||
|
(!lmax || (length <= lmax)))
|
||||||
|
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||||
|
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
|
||||||
|
o1cHits= 0;
|
||||||
|
if (o2Hits)
|
||||||
|
{
|
||||||
|
stktmp = apatseq->hitpos[2];
|
||||||
|
begin = stktmp->val[0] + o2->patlen;
|
||||||
|
|
||||||
|
if (lmax)
|
||||||
|
length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
|
||||||
|
else
|
||||||
|
length= apatseq->seqlen - begin;
|
||||||
|
|
||||||
|
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
|
||||||
|
|
||||||
|
if (o1cHits)
|
||||||
|
for (i=0; i < o2Hits;i++)
|
||||||
|
{
|
||||||
|
posi = apatseq->hitpos[2]->val[i];
|
||||||
|
erri = apatseq->hiterr[2]->val[i];
|
||||||
|
for (j=0; j < o1cHits; j++)
|
||||||
|
{
|
||||||
|
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
|
||||||
|
errj=apatseq->hiterr[3]->val[j];
|
||||||
|
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||||
|
|
||||||
|
if ((!lmin || (length >= lmin)) &&
|
||||||
|
(!lmax || (length <= lmax)))
|
||||||
|
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||||
|
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* End of taxonomic selection */
|
||||||
|
|
||||||
delete_ecoseq(seq);
|
delete_ecoseq(seq);
|
||||||
|
|
||||||
@ -520,7 +527,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
ECOFREE(restricted_taxid, "Error: could not free restricted_taxid\n");
|
ECOFREE(restricted_taxid, "Error: could not free restricted_taxid\n");
|
||||||
ECOFREE(excluded_taxid, "Error: could not free excluded_taxid\n");
|
ECOFREE(ignored_taxid, "Error: could not free excluded_taxid\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,15 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* print the message given as argument and exit the program
|
||||||
|
* @param error error number
|
||||||
|
* @param message the text explaining what's going on
|
||||||
|
* @param filename the file source where the program failed
|
||||||
|
* @param linenumber the line where it has failed
|
||||||
|
* filename and linenumber are written at pre-processing
|
||||||
|
* time by a macro
|
||||||
|
*/
|
||||||
void ecoError(int32_t error,
|
void ecoError(int32_t error,
|
||||||
const char* message,
|
const char* message,
|
||||||
const char * filename,
|
const char * filename,
|
||||||
|
@ -16,20 +16,19 @@ int32_t is_big_endian()
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int32_t swap_int32_t(int32_t i)
|
int32_t swap_int32_t(int32_t i)
|
||||||
{
|
{
|
||||||
return SWAPINT32(i);
|
return SWAPINT32(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read part of the file
|
||||||
|
* @param *f the database
|
||||||
|
* @param recordSize the size to be read
|
||||||
|
*
|
||||||
|
* @return buffer
|
||||||
|
*/
|
||||||
void *read_ecorecord(FILE *f,int32_t *recordSize)
|
void *read_ecorecord(FILE *f,int32_t *recordSize)
|
||||||
{
|
{
|
||||||
static void *buffer =NULL;
|
static void *buffer =NULL;
|
||||||
@ -79,10 +78,14 @@ void *read_ecorecord(FILE *f,int32_t *recordSize)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open the database and check it's readable
|
||||||
|
* @param filename name of the database (.sdx, .rdx, .tbx)
|
||||||
|
* @param sequencecount buffer - pointer to variable storing the number of occurence
|
||||||
|
* @param abort_on_open_error boolean to define the behaviour in case of error
|
||||||
|
* while opening the database
|
||||||
|
* @return FILE type
|
||||||
|
**/
|
||||||
FILE *open_ecorecorddb(const char *filename,
|
FILE *open_ecorecorddb(const char *filename,
|
||||||
int32_t *sequencecount,
|
int32_t *sequencecount,
|
||||||
int32_t abort_on_open_error)
|
int32_t abort_on_open_error)
|
||||||
|
@ -230,7 +230,7 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
|||||||
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||||
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||||
|
|
||||||
int eco_is_taxid_ignored(int *ignored_taxid, int tab_len, int taxid);
|
int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
|
||||||
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int *included_taxid, int tab_len, int taxid);
|
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
|
||||||
|
|
||||||
#endif /*ECOPCR_H_*/
|
#endif /*ECOPCR_H_*/
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
#include "ecoPCR.h"
|
#include "ecoPCR.h"
|
||||||
|
|
||||||
int eco_is_taxid_ignored( int *ignored_taxid,
|
int eco_is_taxid_ignored( int32_t *ignored_taxid,
|
||||||
int tab_len,
|
int32_t tab_len,
|
||||||
int taxid)
|
int32_t taxid)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i=0; i < tab_len; i++){
|
for (i=0; i < tab_len; i++){
|
||||||
@ -14,7 +14,7 @@ int eco_is_taxid_ignored( int *ignored_taxid,
|
|||||||
|
|
||||||
|
|
||||||
int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
|
int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
|
||||||
int *restricted_taxid,
|
int32_t *restricted_taxid,
|
||||||
int32_t tab_len,
|
int32_t tab_len,
|
||||||
int32_t taxid)
|
int32_t taxid)
|
||||||
{
|
{
|
||||||
|
@ -79,7 +79,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ?? used ??
|
||||||
|
**/
|
||||||
FILE *open_ecoseqdb(const char *filename,
|
FILE *open_ecoseqdb(const char *filename,
|
||||||
int32_t *sequencecount)
|
int32_t *sequencecount)
|
||||||
{
|
{
|
||||||
@ -140,6 +142,13 @@ ecoseq_t *readnext_ecoseq(FILE *f)
|
|||||||
return seq;
|
return seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open the sequences database (.sdx file)
|
||||||
|
* @param prefix name of the database (radical without extension)
|
||||||
|
* @param index integer
|
||||||
|
*
|
||||||
|
* @return file object
|
||||||
|
*/
|
||||||
FILE *open_seqfile(const char *prefix,int32_t index)
|
FILE *open_seqfile(const char *prefix,int32_t index)
|
||||||
{
|
{
|
||||||
char filename_buffer[1024];
|
char filename_buffer[1024];
|
||||||
|
Reference in New Issue
Block a user