Compare commits
7 Commits
ecopcr_v0.
...
ecopcr_v1.
Author | SHA1 | Date | |
---|---|---|---|
c4321036be | |||
a92a7fa070 | |||
bd1db764d4 | |||
f0cca648ea | |||
573bd5bad7 | |||
17387dae8d | |||
18583c4d2e |
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,8 +1,12 @@
|
|||||||
|
/.gitignore
|
||||||
|
/.cproject
|
||||||
|
/.project
|
||||||
|
|
||||||
# /src/
|
# /src/
|
||||||
/src/ecoPCR
|
/src/ecoPCR
|
||||||
/src/ecofind
|
/src/ecofind
|
||||||
/src/*.P
|
/src/*.P
|
||||||
|
/src/*.o
|
||||||
/src/ecogrep
|
/src/ecogrep
|
||||||
|
|
||||||
# /src/libapat/
|
# /src/libapat/
|
||||||
@ -14,3 +18,4 @@
|
|||||||
|
|
||||||
# /src/libthermo/
|
# /src/libthermo/
|
||||||
/src/libthermo/*.P
|
/src/libthermo/*.P
|
||||||
|
|
||||||
|
Binary file not shown.
42
src/ecopcr.c
42
src/ecopcr.c
@ -6,7 +6,7 @@
|
|||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
|
|
||||||
|
|
||||||
#define VERSION "0.6.0"
|
#define VERSION "1.0.1"
|
||||||
|
|
||||||
|
|
||||||
/* ----------------------------------------------- */
|
/* ----------------------------------------------- */
|
||||||
@ -26,7 +26,7 @@ static void PrintHelp()
|
|||||||
PP "-a : Salt concentration in M for Tm computation (default 0.05 M)\n\n");
|
PP "-a : Salt concentration in M for Tm computation (default 0.05 M)\n\n");
|
||||||
PP "-c : Consider that the database sequences are [c]ircular\n\n");
|
PP "-c : Consider that the database sequences are [c]ircular\n\n");
|
||||||
PP "-d : [D]atabase : to match the expected format, the database\n");
|
PP "-d : [D]atabase : to match the expected format, the database\n");
|
||||||
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
|
PP " has to be formatted first by the ecoPCRFormat.py program located.\n");
|
||||||
PP " in the tools directory.\n");
|
PP " in the tools directory.\n");
|
||||||
PP " ecoPCRFormat.py creates three file types :\n");
|
PP " ecoPCRFormat.py creates three file types :\n");
|
||||||
PP " .sdx : contains the sequences\n");
|
PP " .sdx : contains the sequences\n");
|
||||||
@ -80,7 +80,7 @@ static void PrintHelp()
|
|||||||
PP "column 21 : sequence\n");
|
PP "column 21 : sequence\n");
|
||||||
PP "column 22 : definition\n");
|
PP "column 22 : definition\n");
|
||||||
PP "------------------------------------------\n");
|
PP "------------------------------------------\n");
|
||||||
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
|
PP " https://git.metabarcoding.org/obitools/ecopcr/wikis/home\n");
|
||||||
PP "------------------------------------------\n\n");
|
PP "------------------------------------------\n\n");
|
||||||
PP "\n");
|
PP "\n");
|
||||||
|
|
||||||
@ -566,11 +566,11 @@ int main(int argc, char **argv)
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
|
||||||
scname = taxonomy->taxons->taxon[seq->taxid].name;
|
//scname = taxonomy->taxons->taxon[seq->taxid].name;
|
||||||
strncpy(head,seq->SQ,10);
|
//strncpy(head,seq->SQ,10);
|
||||||
head[10]=0;
|
//head[10]=0;
|
||||||
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
|
//strncpy(tail,seq->SQ+seq->SQ_length-10,10);
|
||||||
tail[10]=0;
|
//tail[10]=0;
|
||||||
|
|
||||||
apatseq=ecoseq2apatseq(seq,apatseq,circular);
|
apatseq=ecoseq2apatseq(seq,apatseq,circular);
|
||||||
|
|
||||||
@ -610,18 +610,19 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
posj+=o2c->patlen;
|
posj+=o2c->patlen;
|
||||||
// printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
|
// printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
|
||||||
errj =apatseq->hiterr[1]->val[j];
|
errj = apatseq->hiterr[1]->val[j];
|
||||||
length = 0;
|
length = 0;
|
||||||
if (posj > posi)
|
if (posj > posi)
|
||||||
length=posj - posi - o1->patlen - o2->patlen;
|
length = posj - posi - o1->patlen - o2->patlen;
|
||||||
if (posj < posi)
|
if (posj < posi)
|
||||||
length= posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
||||||
if (length &&
|
if ((length>0) && // For when primers touch or overlap
|
||||||
(!lmin || (length >= lmin)) &&
|
(!lmin || (length >= lmin)) &&
|
||||||
(!lmax || (length <= lmax)))
|
(!lmax || (length <= lmax)))
|
||||||
|
{
|
||||||
printRepeat(seq,oligo1,oligo2,&tparm,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
|
printRepeat(seq,oligo1,oligo2,&tparm,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
|
||||||
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -666,15 +667,16 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
length = 0;
|
length = 0;
|
||||||
if (posj > posi)
|
if (posj > posi)
|
||||||
length=posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : suppress by <EC> */
|
length = posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
|
||||||
if (posj < posi)
|
if (posj < posi)
|
||||||
length= posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
||||||
|
if ((length>0) && // For when primers touch or overlap
|
||||||
if (length &&
|
|
||||||
(!lmin || (length >= lmin)) &&
|
(!lmin || (length >= lmin)) &&
|
||||||
(!lmax || (length <= lmax)))
|
(!lmax || (length <= lmax)))
|
||||||
|
{
|
||||||
printRepeat(seq,oligo1,oligo2,&tparm,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
|
printRepeat(seq,oligo1,oligo2,&tparm,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
|
||||||
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
MACHINE=MAC_OS_X
|
|
||||||
LIBPATH= -Llibapat -LlibecoPCR -Llibthermo
|
LIBPATH= -Llibapat -LlibecoPCR -Llibthermo
|
||||||
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
|
MAKEDEPEND = gcc -M $(CPPFLAGS) -o $*.d $<
|
||||||
|
|
||||||
CC=gcc
|
CC=gcc
|
||||||
CFLAGS= -W -Wall -O2 -g
|
CFLAGS= -O3 -w
|
||||||
|
|
||||||
default: all
|
default: all
|
||||||
|
|
||||||
%.o: %.c
|
%.o: %.c
|
||||||
$(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $<
|
$(CC) $(CFLAGS) -c -o $@ $<
|
||||||
|
|
||||||
%.P : %.c
|
%.P : %.c
|
||||||
$(MAKEDEPEND)
|
$(MAKEDEPEND)
|
||||||
|
@ -27,13 +27,9 @@
|
|||||||
#define PROTO 1 /* prototypes flag */
|
#define PROTO 1 /* prototypes flag */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MAC_OS_C
|
|
||||||
#define Vrai true /* TC boolean values */
|
|
||||||
#define Faux false /* */
|
|
||||||
#else
|
|
||||||
#define Vrai 0x1 /* bool values = TRUE */
|
#define Vrai 0x1 /* bool values = TRUE */
|
||||||
#define Faux 0x0 /* = FALSE */
|
#define Faux 0x0 /* = FALSE */
|
||||||
#endif
|
|
||||||
|
|
||||||
#define Nil NULL /* nil pointer */
|
#define Nil NULL /* nil pointer */
|
||||||
|
|
||||||
@ -42,28 +38,7 @@
|
|||||||
#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */
|
#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */
|
||||||
#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */
|
#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */
|
||||||
|
|
||||||
#ifdef MAC_OS_C
|
|
||||||
/* ==================================================== */
|
|
||||||
/* Types (for Macintosh ThinK C || MWerks) */
|
|
||||||
/* ==================================================== */
|
|
||||||
|
|
||||||
/* --- specific sizes --------- */
|
|
||||||
typedef long Int32; /* Int32 = 32 bits signe */
|
|
||||||
typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */
|
|
||||||
typedef short Int16; /* Int16 = 16 bits signe */
|
|
||||||
typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
|
|
||||||
typedef char Int8; /* Int8 = 8 bits signe */
|
|
||||||
typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
|
|
||||||
|
|
||||||
/* --- default types ---------- */
|
|
||||||
|
|
||||||
typedef Boolean Bool; /* booleen */
|
|
||||||
|
|
||||||
typedef long Int; /* 'natural' int (>= 32 bits) */
|
|
||||||
|
|
||||||
typedef void *Ptr; /* pointeur */
|
|
||||||
|
|
||||||
#elif ((defined SUN) || (defined SGI) || (defined UNIX))
|
|
||||||
/* ==================================================== */
|
/* ==================================================== */
|
||||||
/* Types (for Sun & Iris - 32 bits machines) */
|
/* Types (for Sun & Iris - 32 bits machines) */
|
||||||
/* ==================================================== */
|
/* ==================================================== */
|
||||||
@ -84,14 +59,7 @@ typedef int Int; /* 'natural' int (>= 32 bits) */
|
|||||||
|
|
||||||
typedef void *Ptr; /* pointeur */
|
typedef void *Ptr; /* pointeur */
|
||||||
|
|
||||||
#else
|
|
||||||
/* ==================================================== */
|
|
||||||
/* Types (for undefined machines) */
|
|
||||||
/* ==================================================== */
|
|
||||||
|
|
||||||
#error undefined MACHINE <please edit Gmach.h>
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ==================================================== */
|
/* ==================================================== */
|
||||||
/* special macro for prototypes */
|
/* special macro for prototypes */
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python2.7
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import gzip
|
import gzip
|
||||||
@ -7,11 +7,8 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import getopt
|
import getopt
|
||||||
|
|
||||||
try:
|
_dbenable=False
|
||||||
import psycopg2
|
|
||||||
_dbenable=True
|
|
||||||
except ImportError:
|
|
||||||
_dbenable=False
|
|
||||||
|
|
||||||
#####
|
#####
|
||||||
#
|
#
|
||||||
@ -221,56 +218,7 @@ def readTaxonomyDump(taxdir):
|
|||||||
|
|
||||||
return taxonomy,ranks,alternativeName,index
|
return taxonomy,ranks,alternativeName,index
|
||||||
|
|
||||||
def readTaxonomyDB(dbname):
|
|
||||||
connection = psycopg2.connect(database=dbname)
|
|
||||||
|
|
||||||
cursor = connection.cursor()
|
|
||||||
cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
|
|
||||||
taxonomy=[list(x) for x in cursor]
|
|
||||||
|
|
||||||
cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
|
|
||||||
ranks=cursor.fetchall()
|
|
||||||
ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
|
|
||||||
|
|
||||||
print >>sys.stderr,"Sorting taxons..."
|
|
||||||
taxonomy.sort(taxonCmp)
|
|
||||||
|
|
||||||
print >>sys.stderr,"Indexing taxonomy..."
|
|
||||||
index = {}
|
|
||||||
for t in taxonomy:
|
|
||||||
index[t[0]]=bsearchTaxon(taxonomy, t[0])
|
|
||||||
|
|
||||||
print >>sys.stderr,"Indexing parent and rank..."
|
|
||||||
for t in taxonomy:
|
|
||||||
t[1]=ranks[t[1]]
|
|
||||||
try:
|
|
||||||
t[2]=index[t[2]]
|
|
||||||
except KeyError,e:
|
|
||||||
if t[2] is None and t[0]==1:
|
|
||||||
t[2]=index[t[0]]
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
|
|
||||||
|
|
||||||
alternativeName=[]
|
|
||||||
for taxid,name,classname in cursor:
|
|
||||||
alternativeName.append((name,classname,index[taxid]))
|
|
||||||
if classname == 'scientific name':
|
|
||||||
taxonomy[index[taxid]].append(name)
|
|
||||||
|
|
||||||
cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
|
|
||||||
|
|
||||||
print >>sys.stderr,"Adding taxid alias..."
|
|
||||||
for taxid,current in cursor:
|
|
||||||
if current is not None:
|
|
||||||
index[taxid]=index[current]
|
|
||||||
else:
|
|
||||||
index[taxid]=None
|
|
||||||
|
|
||||||
|
|
||||||
return taxonomy,ranks,alternativeName,index
|
|
||||||
|
|
||||||
#####
|
#####
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
@ -585,10 +533,9 @@ def ecoParseOptions(arguments):
|
|||||||
}
|
}
|
||||||
|
|
||||||
o,filenames = getopt.getopt(arguments,
|
o,filenames = getopt.getopt(arguments,
|
||||||
'ht:T:n:gfe',
|
'ht:n:gfe',
|
||||||
['help',
|
['help',
|
||||||
'taxonomy=',
|
'taxonomy=',
|
||||||
'taxonomy_db=',
|
|
||||||
'name=',
|
'name=',
|
||||||
'genbank',
|
'genbank',
|
||||||
'fasta',
|
'fasta',
|
||||||
@ -601,9 +548,6 @@ def ecoParseOptions(arguments):
|
|||||||
elif name in ('-t','--taxonomy'):
|
elif name in ('-t','--taxonomy'):
|
||||||
opt['taxmod']='dump'
|
opt['taxmod']='dump'
|
||||||
opt['taxdir']=value
|
opt['taxdir']=value
|
||||||
elif name in ('-T','--taxonomy_db'):
|
|
||||||
opt['taxmod']='db'
|
|
||||||
opt['taxdb']=value
|
|
||||||
elif name in ('-n','--name'):
|
elif name in ('-n','--name'):
|
||||||
opt['prefix']=value
|
opt['prefix']=value
|
||||||
elif name in ('-g','--genbank'):
|
elif name in ('-g','--genbank'):
|
||||||
@ -622,6 +566,7 @@ def ecoParseOptions(arguments):
|
|||||||
|
|
||||||
return opt,filenames
|
return opt,filenames
|
||||||
|
|
||||||
|
|
||||||
def printHelp():
|
def printHelp():
|
||||||
print "-----------------------------------"
|
print "-----------------------------------"
|
||||||
print " ecoPCRFormat.py"
|
print " ecoPCRFormat.py"
|
||||||
@ -641,11 +586,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
opt,filenames = ecoParseOptions(sys.argv[1:])
|
opt,filenames = ecoParseOptions(sys.argv[1:])
|
||||||
|
|
||||||
if opt['taxmod']=='dump':
|
taxonomy = readTaxonomyDump(opt['taxdir'])
|
||||||
taxonomy = readTaxonomyDump(opt['taxdir'])
|
|
||||||
elif opt['taxmod']=='db':
|
|
||||||
taxonomy = readTaxonomyDB(opt['taxdb'])
|
|
||||||
|
|
||||||
|
|
||||||
ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
|
ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user