Change to output to annotated fasta. Older output mode is available with -T option

This commit is contained in:
2009-10-21 21:27:02 +00:00
parent 208347fccf
commit d864bcac04

View File

@ -1,34 +1,24 @@
#!/usr/local/bin/python #!/usr/local/bin/python
from obitools.fasta import fastaNucIterator from obitools.fasta import fastaNucIterator,formatFasta
from obitools.align.ssearch import ssearchIterator from obitools.align.ssearch import ssearchIterator
from obitools.utils.bioseq import uniqSequence,sortSequence from obitools.utils.bioseq import uniqSequence,sortSequence
from obitools.ecopcr.taxonomy import EcoTaxonomyDB from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDatabase
import sys
import re
from logging import root,DEBUG
from obitools.options import getOptionManager from obitools.options import getOptionManager
def addSearchOptions(optionManager): def addSearchOptions(optionManager):
optionManager.add_option('-d','--database', optionManager.add_option('-R','--ref-database',
action="store", dest="database", action="store", dest="database",
metavar="<FILENAME>", metavar="<FILENAME>",
type="string", type="string",
help="fasta file containing reference" help="fasta file containing reference"
"sequences") "sequences")
optionManager.add_option('-t','--taxonomy',
action="store", dest="taxonomy",
metavar="<FILENAME>",
type="string",
help="ecoPCR taxonomy Database "
"name")
optionManager.add_option('-s','--shape', optionManager.add_option('-s','--shape',
action="store", dest="shape", action="store", dest="shape",
metavar="shapeness", metavar="shapeness",
@ -73,6 +63,11 @@ def addSearchOptions(optionManager):
default=False, default=False,
help='Apply uniq filter on sequences before identification') help='Apply uniq filter on sequences before identification')
optionManager.add_option('-T','--table',
action='store_true',dest='table',
default=False,
help='Write results in a tabular format')
optionManager.add_option('-S','--sort', optionManager.add_option('-S','--sort',
action='store',dest='sort', action='store',dest='sort',
type='string', type='string',
@ -95,7 +90,7 @@ def addSearchOptions(optionManager):
def ssearchPropsFilter(ssearchresult,shape=2.0,coverage=0.9,dbcoverage=0.9): def ssearchPropsFilter(ssearchresult,shape=2.0,coverage=0.9,dbcoverage=0.9):
try: try:
idmax = ssearchresult.props[0]['identity'] idmax = ssearchresult.props[0]['identity']
except AttributeError,e: except AttributeError:
return None return None
idthresold = idmax ** shape idthresold = idmax ** shape
@ -127,16 +122,19 @@ def count(data):
if __name__=='__main__': if __name__=='__main__':
optionParser = getOptionManager([addSearchOptions], optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions],
entryIterator=fastaNucIterator entryIterator=fastaNucIterator
) )
(options, entries) = optionParser() (options, entries) = optionParser()
assert options.program in ('fasta35','ssearch35') assert options.program in ('fasta35','ssearch35')
taxonomy = EcoTaxonomyDB(options.taxonomy) if options.explain is not None:
options.table=True
taxonomy = loadTaxonomyDatabase(options)
db = fastaNucIterator(options.database) db = fastaNucIterator(options.database)
taxonlink = {} taxonlink = {}
@ -208,7 +206,35 @@ if __name__=='__main__':
if options.sequence: if options.sequence:
data.append(seq) data.append(seq)
print '\t'.join([str(x) for x in data])
if options.table:
print '\t'.join([str(x) for x in data])
else:
seq['id_status']=data[0]=='ID'
seq['count']=data[6]
seq['match_count']=data[7]
seq['taxid']=data[8]
seq['scientific_name']=data[9]
seq['rank']=data[10]
if seq['id_status']:
seq['better_match']=data[2]
seq['better_identity']=data[3]
seq['worst_identity']=data[4]
seq['lower_id_limit']=float(data[5])
if int(data[11])>=0:
seq['order']=data[11]
seq['order_name']=data[12]
if int(data[13])>=0:
seq['family']=data[13]
seq['family_name']=data[14]
if int(data[15])>=0:
seq['genus']=data[15]
seq['genus_name']=data[16]
if int(data[17])>=0:
seq['species']=data[17]
seq['species_name']=data[18]
print formatFasta(seq)
if good and rankid is not None: if good and rankid is not None:
splist=count((taxonomy.getTaxonAtRank(x, rankid),y) splist=count((taxonomy.getTaxonAtRank(x, rankid),y)