Change to output to annotated fasta. Older output mode is available with -T option

This commit is contained in:
2009-10-21 21:27:02 +00:00
parent 208347fccf
commit d864bcac04

View File

@ -1,34 +1,24 @@
#!/usr/local/bin/python
from obitools.fasta import fastaNucIterator
from obitools.fasta import fastaNucIterator,formatFasta
from obitools.align.ssearch import ssearchIterator
from obitools.utils.bioseq import uniqSequence,sortSequence
from obitools.ecopcr.taxonomy import EcoTaxonomyDB
from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDatabase
import sys
import re
from logging import root,DEBUG
from obitools.options import getOptionManager
def addSearchOptions(optionManager):
optionManager.add_option('-d','--database',
optionManager.add_option('-R','--ref-database',
action="store", dest="database",
metavar="<FILENAME>",
type="string",
help="fasta file containing reference"
"sequences")
optionManager.add_option('-t','--taxonomy',
action="store", dest="taxonomy",
metavar="<FILENAME>",
type="string",
help="ecoPCR taxonomy Database "
"name")
optionManager.add_option('-s','--shape',
action="store", dest="shape",
metavar="shapeness",
@ -73,6 +63,11 @@ def addSearchOptions(optionManager):
default=False,
help='Apply uniq filter on sequences before identification')
optionManager.add_option('-T','--table',
action='store_true',dest='table',
default=False,
help='Write results in a tabular format')
optionManager.add_option('-S','--sort',
action='store',dest='sort',
type='string',
@ -95,7 +90,7 @@ def addSearchOptions(optionManager):
def ssearchPropsFilter(ssearchresult,shape=2.0,coverage=0.9,dbcoverage=0.9):
try:
idmax = ssearchresult.props[0]['identity']
except AttributeError,e:
except AttributeError:
return None
idthresold = idmax ** shape
@ -127,7 +122,7 @@ def count(data):
if __name__=='__main__':
optionParser = getOptionManager([addSearchOptions],
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions],
entryIterator=fastaNucIterator
)
@ -135,8 +130,11 @@ if __name__=='__main__':
assert options.program in ('fasta35','ssearch35')
if options.explain is not None:
options.table=True
taxonomy = loadTaxonomyDatabase(options)
taxonomy = EcoTaxonomyDB(options.taxonomy)
db = fastaNucIterator(options.database)
taxonlink = {}
@ -208,7 +206,35 @@ if __name__=='__main__':
if options.sequence:
data.append(seq)
if options.table:
print '\t'.join([str(x) for x in data])
else:
seq['id_status']=data[0]=='ID'
seq['count']=data[6]
seq['match_count']=data[7]
seq['taxid']=data[8]
seq['scientific_name']=data[9]
seq['rank']=data[10]
if seq['id_status']:
seq['better_match']=data[2]
seq['better_identity']=data[3]
seq['worst_identity']=data[4]
seq['lower_id_limit']=float(data[5])
if int(data[11])>=0:
seq['order']=data[11]
seq['order_name']=data[12]
if int(data[13])>=0:
seq['family']=data[13]
seq['family_name']=data[14]
if int(data[15])>=0:
seq['genus']=data[15]
seq['genus_name']=data[16]
if int(data[17])>=0:
seq['species']=data[17]
seq['species_name']=data[18]
print formatFasta(seq)
if good and rankid is not None:
splist=count((taxonomy.getTaxonAtRank(x, rankid),y)