|
|
|
@ -1,4 +1,4 @@
|
|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
#!/usr/bin/env python2.7
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
import gzip
|
|
|
|
@ -7,11 +7,8 @@ import sys
|
|
|
|
|
import time
|
|
|
|
|
import getopt
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
import psycopg2
|
|
|
|
|
_dbenable=True
|
|
|
|
|
except ImportError:
|
|
|
|
|
_dbenable=False
|
|
|
|
|
_dbenable=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####
|
|
|
|
|
#
|
|
|
|
@ -221,56 +218,7 @@ def readTaxonomyDump(taxdir):
|
|
|
|
|
|
|
|
|
|
return taxonomy,ranks,alternativeName,index
|
|
|
|
|
|
|
|
|
|
def readTaxonomyDB(dbname):
|
|
|
|
|
connection = psycopg2.connect(database=dbname)
|
|
|
|
|
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
|
cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
|
|
|
|
|
taxonomy=[list(x) for x in cursor]
|
|
|
|
|
|
|
|
|
|
cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
|
|
|
|
|
ranks=cursor.fetchall()
|
|
|
|
|
ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
|
|
|
|
|
|
|
|
|
|
print >>sys.stderr,"Sorting taxons..."
|
|
|
|
|
taxonomy.sort(taxonCmp)
|
|
|
|
|
|
|
|
|
|
print >>sys.stderr,"Indexing taxonomy..."
|
|
|
|
|
index = {}
|
|
|
|
|
for t in taxonomy:
|
|
|
|
|
index[t[0]]=bsearchTaxon(taxonomy, t[0])
|
|
|
|
|
|
|
|
|
|
print >>sys.stderr,"Indexing parent and rank..."
|
|
|
|
|
for t in taxonomy:
|
|
|
|
|
t[1]=ranks[t[1]]
|
|
|
|
|
try:
|
|
|
|
|
t[2]=index[t[2]]
|
|
|
|
|
except KeyError,e:
|
|
|
|
|
if t[2] is None and t[0]==1:
|
|
|
|
|
t[2]=index[t[0]]
|
|
|
|
|
else:
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
|
|
|
|
|
|
|
|
|
|
alternativeName=[]
|
|
|
|
|
for taxid,name,classname in cursor:
|
|
|
|
|
alternativeName.append((name,classname,index[taxid]))
|
|
|
|
|
if classname == 'scientific name':
|
|
|
|
|
taxonomy[index[taxid]].append(name)
|
|
|
|
|
|
|
|
|
|
cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
|
|
|
|
|
|
|
|
|
|
print >>sys.stderr,"Adding taxid alias..."
|
|
|
|
|
for taxid,current in cursor:
|
|
|
|
|
if current is not None:
|
|
|
|
|
index[taxid]=index[current]
|
|
|
|
|
else:
|
|
|
|
|
index[taxid]=None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return taxonomy,ranks,alternativeName,index
|
|
|
|
|
|
|
|
|
|
#####
|
|
|
|
|
#
|
|
|
|
|
#
|
|
|
|
@ -585,10 +533,9 @@ def ecoParseOptions(arguments):
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
o,filenames = getopt.getopt(arguments,
|
|
|
|
|
'ht:T:n:gfe',
|
|
|
|
|
'ht:n:gfe',
|
|
|
|
|
['help',
|
|
|
|
|
'taxonomy=',
|
|
|
|
|
'taxonomy_db=',
|
|
|
|
|
'name=',
|
|
|
|
|
'genbank',
|
|
|
|
|
'fasta',
|
|
|
|
@ -601,9 +548,6 @@ def ecoParseOptions(arguments):
|
|
|
|
|
elif name in ('-t','--taxonomy'):
|
|
|
|
|
opt['taxmod']='dump'
|
|
|
|
|
opt['taxdir']=value
|
|
|
|
|
elif name in ('-T','--taxonomy_db'):
|
|
|
|
|
opt['taxmod']='db'
|
|
|
|
|
opt['taxdb']=value
|
|
|
|
|
elif name in ('-n','--name'):
|
|
|
|
|
opt['prefix']=value
|
|
|
|
|
elif name in ('-g','--genbank'):
|
|
|
|
@ -622,6 +566,7 @@ def ecoParseOptions(arguments):
|
|
|
|
|
|
|
|
|
|
return opt,filenames
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def printHelp():
|
|
|
|
|
print "-----------------------------------"
|
|
|
|
|
print " ecoPCRFormat.py"
|
|
|
|
@ -641,11 +586,7 @@ if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
opt,filenames = ecoParseOptions(sys.argv[1:])
|
|
|
|
|
|
|
|
|
|
if opt['taxmod']=='dump':
|
|
|
|
|
taxonomy = readTaxonomyDump(opt['taxdir'])
|
|
|
|
|
elif opt['taxmod']=='db':
|
|
|
|
|
taxonomy = readTaxonomyDB(opt['taxdb'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
taxonomy = readTaxonomyDump(opt['taxdir'])
|
|
|
|
|
|
|
|
|
|
ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
|
|
|
|
|
|
|
|
|
|