Add option to ecoPCRFormat to deal with an obischema db as taxonomy source
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@158 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -7,6 +7,12 @@ import sys
|
||||
import time
|
||||
import getopt
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
_dbenable=True
|
||||
except ImportError:
|
||||
_dbenable=False
|
||||
|
||||
#####
|
||||
#
|
||||
#
|
||||
@ -215,6 +221,55 @@ def readTaxonomyDump(taxdir):
|
||||
|
||||
return taxonomy,ranks,alternativeName,index
|
||||
|
||||
def readTaxonomyDB(dbname):
|
||||
connection = psycopg2.connect(database=dbname)
|
||||
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
|
||||
taxonomy=[list(x) for x in cursor]
|
||||
|
||||
cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
|
||||
ranks=cursor.fetchall()
|
||||
ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
|
||||
|
||||
print >>sys.stderr,"Sorting taxons..."
|
||||
taxonomy.sort(taxonCmp)
|
||||
|
||||
print >>sys.stderr,"Indexing taxonomy..."
|
||||
index = {}
|
||||
for t in taxonomy:
|
||||
index[t[0]]=bsearchTaxon(taxonomy, t[0])
|
||||
|
||||
print >>sys.stderr,"Indexing parent and rank..."
|
||||
for t in taxonomy:
|
||||
t[1]=ranks[t[1]]
|
||||
try:
|
||||
t[2]=index[t[2]]
|
||||
except KeyError,e:
|
||||
if t[2] is None and t[0]==1:
|
||||
t[2]=index[t[0]]
|
||||
else:
|
||||
raise e
|
||||
|
||||
cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
|
||||
|
||||
alternativeName=[]
|
||||
for taxid,name,classname in cursor:
|
||||
alternativeName.append((name,classname,index[taxid]))
|
||||
if classname == 'scientific name':
|
||||
taxonomy[index[taxid]].append(name)
|
||||
|
||||
cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
|
||||
|
||||
print >>sys.stderr,"Adding taxid alias..."
|
||||
for taxid,current in cursor:
|
||||
if current is not None:
|
||||
index[taxid]=index[current]
|
||||
else:
|
||||
index[taxid]=None
|
||||
|
||||
|
||||
return taxonomy,ranks,alternativeName,index
|
||||
|
||||
#####
|
||||
#
|
||||
@ -293,11 +348,14 @@ def emblEntryParser(entry):
|
||||
|
||||
######################
|
||||
|
||||
_fastaSplit=re.compile(';\W*')
|
||||
|
||||
def parseFasta(seq):
|
||||
seq=seq.split('\n')
|
||||
title = seq[0].strip()[1:].split(None,1)
|
||||
id=title[0]
|
||||
if len(title) == 2:
|
||||
field = title[1].split('; ')
|
||||
field = _fastaSplit.split(title[1])
|
||||
else:
|
||||
field=[]
|
||||
info = dict(x.split('=',1) for x in field if '=' in x)
|
||||
@ -527,9 +585,10 @@ def ecoParseOptions(arguments):
|
||||
}
|
||||
|
||||
o,filenames = getopt.getopt(arguments,
|
||||
'ht:n:gfe',
|
||||
'ht:T:n:gfe',
|
||||
['help',
|
||||
'taxonomy=',
|
||||
'taxonomy_db=',
|
||||
'name=',
|
||||
'genbank',
|
||||
'fasta',
|
||||
@ -540,7 +599,11 @@ def ecoParseOptions(arguments):
|
||||
printHelp()
|
||||
exit()
|
||||
elif name in ('-t','--taxonomy'):
|
||||
opt['taxmod']='dump'
|
||||
opt['taxdir']=value
|
||||
elif name in ('-T','--taxonomy_db'):
|
||||
opt['taxmod']='db'
|
||||
opt['taxdb']=value
|
||||
elif name in ('-n','--name'):
|
||||
opt['prefix']=value
|
||||
elif name in ('-g','--genbank'):
|
||||
@ -578,7 +641,11 @@ if __name__ == '__main__':
|
||||
|
||||
opt,filenames = ecoParseOptions(sys.argv[1:])
|
||||
|
||||
if opt['taxmod']=='dump':
|
||||
taxonomy = readTaxonomyDump(opt['taxdir'])
|
||||
elif opt['taxmod']=='db':
|
||||
taxonomy = readTaxonomyDB(opt['taxdb'])
|
||||
|
||||
|
||||
ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
|
||||
|
||||
|
Reference in New Issue
Block a user