11 Commits

9 changed files with 129 additions and 209 deletions

5
.gitignore vendored
View File

@ -1,8 +1,12 @@
/.gitignore
/.cproject
/.project
# /src/
/src/ecoPCR
/src/ecofind
/src/*.P
/src/*.o
/src/ecogrep
# /src/libapat/
@ -14,3 +18,4 @@
# /src/libthermo/
/src/libthermo/*.P

View File

@ -1 +1 @@
0.6.0
1.0.0

Binary file not shown.

View File

@ -6,7 +6,7 @@
#include <getopt.h>
#define VERSION "0.6.0"
#define VERSION "1.0.1"
/* ----------------------------------------------- */
@ -26,7 +26,7 @@ static void PrintHelp()
PP "-a : Salt concentration in M for Tm computation (default 0.05 M)\n\n");
PP "-c : Consider that the database sequences are [c]ircular\n\n");
PP "-d : [D]atabase : to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
PP " has to be formatted first by the ecoPCRFormat.py program located.\n");
PP " in the tools directory.\n");
PP " ecoPCRFormat.py creates three file types :\n");
PP " .sdx : contains the sequences\n");
@ -80,7 +80,7 @@ static void PrintHelp()
PP "column 21 : sequence\n");
PP "column 22 : definition\n");
PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
PP " https://git.metabarcoding.org/obitools/ecopcr/wikis/home\n");
PP "------------------------------------------\n\n");
PP "\n");
@ -566,11 +566,11 @@ int main(int argc, char **argv)
)
{
scname = taxonomy->taxons->taxon[seq->taxid].name;
strncpy(head,seq->SQ,10);
head[10]=0;
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
tail[10]=0;
//scname = taxonomy->taxons->taxon[seq->taxid].name;
//strncpy(head,seq->SQ,10);
//head[10]=0;
//strncpy(tail,seq->SQ+seq->SQ_length-10,10);
//tail[10]=0;
apatseq=ecoseq2apatseq(seq,apatseq,circular);
@ -610,18 +610,21 @@ int main(int argc, char **argv)
{
posj+=o2c->patlen;
// printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
errj =apatseq->hiterr[1]->val[j];
errj = apatseq->hiterr[1]->val[j];
length = 0;
if (posj > posi)
length=posj - posi - o1->patlen - o2->patlen;
if (posj < posi)
length= posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
if (length &&
length = posj - posi - o1->patlen - o2->patlen;
else {
if (circular > 0)
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
}
if ((length>0) && // For when primers touch or overlap
(!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
(!lmax || (length <= lmax)))
{
printRepeat(seq,oligo1,oligo2,&tparm,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
}
}
}
}
@ -647,7 +650,7 @@ int main(int argc, char **argv)
}
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
// printf("circular= %d\n",circular);
if (o1cHits)
for (i=0; i < o2Hits;i++)
{
@ -666,15 +669,19 @@ int main(int argc, char **argv)
length = 0;
if (posj > posi)
length=posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : suppress by <EC> */
if (posj < posi)
length= posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
length = posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
else {
if (circular > 0)
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
}
if (length &&
if ((length>0) && // For when primers touch or overlap
(!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
(!lmax || (length <= lmax)))
{
printRepeat(seq,oligo1,oligo2,&tparm,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
}
}
}
}

View File

@ -1,14 +1,13 @@
MACHINE=MAC_OS_X
LIBPATH= -Llibapat -LlibecoPCR -Llibthermo
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
MAKEDEPEND = gcc -M $(CPPFLAGS) -o $*.d $<
CC=gcc
CFLAGS= -W -Wall -O2 -g
CFLAGS= -O3 -w
default: all
%.o: %.c
$(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $<
$(CC) $(CFLAGS) -c -o $@ $<
%.P : %.c
$(MAKEDEPEND)

View File

@ -27,13 +27,9 @@
#define PROTO 1 /* prototypes flag */
#endif
#ifdef MAC_OS_C
#define Vrai true /* TC boolean values */
#define Faux false /* */
#else
#define Vrai 0x1 /* bool values = TRUE */
#define Faux 0x0 /* = FALSE */
#endif
#define Nil NULL /* nil pointer */
@ -42,28 +38,7 @@
#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */
#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */
#ifdef MAC_OS_C
/* ==================================================== */
/* Types (for Macintosh ThinK C || MWerks) */
/* ==================================================== */
/* --- specific sizes --------- */
typedef long Int32; /* Int32 = 32 bits signe */
typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */
typedef short Int16; /* Int16 = 16 bits signe */
typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
typedef char Int8; /* Int8 = 8 bits signe */
typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
/* --- default types ---------- */
typedef Boolean Bool; /* booleen */
typedef long Int; /* 'natural' int (>= 32 bits) */
typedef void *Ptr; /* pointeur */
#elif ((defined SUN) || (defined SGI) || (defined UNIX))
/* ==================================================== */
/* Types (for Sun & Iris - 32 bits machines) */
/* ==================================================== */
@ -84,14 +59,7 @@ typedef int Int; /* 'natural' int (>= 32 bits) */
typedef void *Ptr; /* pointeur */
#else
/* ==================================================== */
/* Types (for undefined machines) */
/* ==================================================== */
#error undefined MACHINE <please edit Gmach.h>
#endif
/* ==================================================== */
/* special macro for prototypes */

View File

@ -12,14 +12,14 @@ ecorankidx_t *read_rankidx(const char *filename)
int32_t i;
int32_t rs;
char *buffer;
f = open_ecorecorddb(filename,&count,1);
index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
"Allocate rank index");
index->count=count;
index->count=count;
for (i=0; i < count; i++)
{
buffer = read_ecorecord(f,&rs);
@ -27,21 +27,18 @@ ecorankidx_t *read_rankidx(const char *filename)
"Allocate rank label");
strncpy(index->label[i],buffer,rs);
}
return index;
}
int32_t rank_index(const char* label,ecorankidx_t* ranks)
{
char **rep;
fprintf(stderr,"Looking for rank -%s-... ",label);
rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
if (rep)
return rep-ranks->label;
else
ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
return -1;
}

View File

@ -5,10 +5,10 @@
static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
/**
* Open the taxonomy database
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
/**
* Open the taxonomy database
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2)
{
@ -18,22 +18,22 @@ ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2)
FILE *f2;
ecotxidx_t *index;
int32_t i;
f = open_ecorecorddb(filename,&count,1);
f2 = open_ecorecorddb(filename2,&count2,0);
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
"Allocate taxonomy");
index->count=count+count2;
fprintf(stderr,"Reading %d taxa...\n",count);
for (i=0; i < count; i++){
readnext_ecotaxon(f,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent;
}
}
if (count2>0)
fprintf(stderr,"Reading %d local taxa...\n",count2);
@ -52,18 +52,18 @@ ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2)
int32_t delete_taxonomy(ecotxidx_t *index)
{
int32_t i;
if (index)
{
for (i=0; i< index->count; i++)
if (index->taxon[i].name)
ECOFREE(index->taxon[i].name,"Free scientific name");
ECOFREE(index,"Free Taxonomy");
return 0;
}
return 1;
}
@ -75,32 +75,32 @@ int32_t delete_taxon(ecotx_t *taxon)
{
if (taxon->name)
ECOFREE(taxon->name,"Free scientific name");
ECOFREE(taxon,"Free Taxon");
return 0;
}
return 1;
}
/**
* Read the database for a given taxon a save the data
* Read the database for a given taxon a save the data
* into the taxon structure(if any found)
* @param *f pointer to FILE type returned by fopen
* @param *taxon pointer to the structure
*
* @return a ecotx_t structure if any taxon found else NULL
*
* @return a ecotx_t structure if any taxon found else NULL
*/
ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
{
ecotxformat_t *raw;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
@ -109,18 +109,18 @@ ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
raw->namelength = swap_int32_t(raw->namelength);
raw->parent = swap_int32_t(raw->parent);
raw->rank = swap_int32_t(raw->rank);
raw->taxid = swap_int32_t(raw->taxid);
raw->taxid = swap_int32_t(raw->taxid);
}
taxon->parent = (ecotx_t*)raw->parent;
taxon->taxid = raw->taxid;
taxon->rank = raw->rank;
taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
"Allocate taxon scientific name");
strncpy(taxon->name,raw->name,raw->namelength);
return taxon;
}
@ -131,26 +131,26 @@ ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
char *filename;
char *filename2;
int buffsize;
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
"Allocate taxonomy structure");
buffsize = strlen(prefix)+10;
filename = ECOMALLOC(buffsize,
"Allocate filename");
filename2= ECOMALLOC(buffsize,
"Allocate filename");
snprintf(filename,buffsize,"%s.rdx",prefix);
tax->ranks = read_rankidx(filename);
snprintf(filename,buffsize,"%s.tdx",prefix);
snprintf(filename2,buffsize,"%s.ldx",prefix);
tax->taxons = read_taxonomyidx(filename,filename2);
if (readAlternativeName)
{
snprintf(filename,buffsize,"%s.ndx",prefix);
@ -159,7 +159,7 @@ ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
else
tax->names=NULL;
return tax;
}
@ -170,15 +170,15 @@ int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
{
if (taxonomy->ranks)
ECOFREE(taxonomy->ranks,"Free rank index");
if (taxonomy->taxons)
ECOFREE(taxonomy->taxons,"Free taxon index");
ECOFREE(taxonomy,"Free taxonomy structure");
return 0;
}
return 1;
}
@ -187,17 +187,17 @@ ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
{
ecotx_t *current_taxon;
ecotx_t *next_taxon;
current_taxon = taxon;
next_taxon = current_taxon->parent;
while ((current_taxon!=next_taxon) && // I' am the root node
(current_taxon->rank!=rankidx))
{
current_taxon = next_taxon;
next_taxon = current_taxon->parent;
}
if (current_taxon->rank==rankidx)
return current_taxon;
else
@ -207,19 +207,19 @@ ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
/**
* Get back information concerning a taxon from a taxonomic id
* @param *taxonomy the taxonomy database
* @param taxid the taxonomic id
*
* @result a ecotx_t structure containing the taxonimic information
* @param taxid the taxonomic id
*
* @result a ecotx_t structure containing the taxonimic information
**/
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
int32_t taxid)
{
ecotx_t *current_taxon;
int32_t taxoncount;
int32_t i;
taxoncount=taxonomy->taxons->count;
for (current_taxon=taxonomy->taxons->taxon,
i=0;
i < taxoncount;
@ -229,30 +229,30 @@ ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
return current_taxon;
}
}
return (ecotx_t*)NULL;
return (ecotx_t*)NULL;
}
/**
* Find out if taxon is son of other taxon (identified by its taxid)
* @param *taxon son taxon
* @param parent_taxid taxonomic id of the other taxon
*
*
* @return 1 is the other taxid math a parent taxid, else 0
**/
int eco_isundertaxon(ecotx_t *taxon,
int eco_isundertaxon(ecotx_t *taxon,
int other_taxid)
{
ecotx_t *next_parent;
next_parent = taxon->parent;
while ( (other_taxid != next_parent->taxid) &&
next_parent = taxon->parent;
while ( (other_taxid != next_parent->taxid) &&
(strcmp(next_parent->name, "root")) )
{
next_parent = next_parent->parent;
}
if (other_taxid == next_parent->taxid)
return 1;
else
@ -264,16 +264,16 @@ ecotx_t *eco_getspecies(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("species",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
@ -282,16 +282,16 @@ ecotx_t *eco_getgenus(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("genus",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
@ -301,16 +301,16 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("family",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
@ -319,16 +319,16 @@ ecotx_t *eco_getkingdom(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("kingdom",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
@ -337,15 +337,18 @@ ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("superkingdom",taxonomy->ranks);
if (rankindex < 0) {
rankindex = rank_index("domain",taxonomy->ranks);
}
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python2.7
import re
import gzip
@ -7,11 +7,8 @@ import sys
import time
import getopt
try:
import psycopg2
_dbenable=True
except ImportError:
_dbenable=False
_dbenable=False
#####
#
@ -221,56 +218,7 @@ def readTaxonomyDump(taxdir):
return taxonomy,ranks,alternativeName,index
def readTaxonomyDB(dbname):
connection = psycopg2.connect(database=dbname)
cursor = connection.cursor()
cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
taxonomy=[list(x) for x in cursor]
cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
ranks=cursor.fetchall()
ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
print >>sys.stderr,"Sorting taxons..."
taxonomy.sort(taxonCmp)
print >>sys.stderr,"Indexing taxonomy..."
index = {}
for t in taxonomy:
index[t[0]]=bsearchTaxon(taxonomy, t[0])
print >>sys.stderr,"Indexing parent and rank..."
for t in taxonomy:
t[1]=ranks[t[1]]
try:
t[2]=index[t[2]]
except KeyError,e:
if t[2] is None and t[0]==1:
t[2]=index[t[0]]
else:
raise e
cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
alternativeName=[]
for taxid,name,classname in cursor:
alternativeName.append((name,classname,index[taxid]))
if classname == 'scientific name':
taxonomy[index[taxid]].append(name)
cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
print >>sys.stderr,"Adding taxid alias..."
for taxid,current in cursor:
if current is not None:
index[taxid]=index[current]
else:
index[taxid]=None
return taxonomy,ranks,alternativeName,index
#####
#
#
@ -585,10 +533,9 @@ def ecoParseOptions(arguments):
}
o,filenames = getopt.getopt(arguments,
'ht:T:n:gfe',
'ht:n:gfe',
['help',
'taxonomy=',
'taxonomy_db=',
'name=',
'genbank',
'fasta',
@ -601,9 +548,6 @@ def ecoParseOptions(arguments):
elif name in ('-t','--taxonomy'):
opt['taxmod']='dump'
opt['taxdir']=value
elif name in ('-T','--taxonomy_db'):
opt['taxmod']='db'
opt['taxdb']=value
elif name in ('-n','--name'):
opt['prefix']=value
elif name in ('-g','--genbank'):
@ -622,6 +566,7 @@ def ecoParseOptions(arguments):
return opt,filenames
def printHelp():
print "-----------------------------------"
print " ecoPCRFormat.py"
@ -641,11 +586,7 @@ if __name__ == '__main__':
opt,filenames = ecoParseOptions(sys.argv[1:])
if opt['taxmod']=='dump':
taxonomy = readTaxonomyDump(opt['taxdir'])
elif opt['taxmod']=='db':
taxonomy = readTaxonomyDB(opt['taxdb'])
taxonomy = readTaxonomyDump(opt['taxdir'])
ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])