Add a -x option to the ecoTag.py script

This commit is contained in:
2008-04-10 10:00:11 +00:00
parent b7e93c475c
commit 2fbb894412

View File

@ -52,6 +52,13 @@ def addSearchOptions(optionManager):
default='fasta35',
help="Program used for search similarity.")
optionManager.add_option('-x','--explain',
action='store',dest='explain',
type="string",
default=None,
help="Add in the output CD (complementary data) record "
"to explain identification decision")
def ssearchPropsFilter(ssearchresult,shape=2.0,coverage=0.9):
try:
@ -67,6 +74,24 @@ def ssearchPropsFilter(ssearchresult,shape=2.0,coverage=0.9):
return fprops
def count(data):
rep = {}
for x in data:
if isinstance(x, (list,tuple)):
k = x[0]
if len(x) > 1:
v = [x[1]]
default=[]
else:
v = 1
default=0
else:
k=x
v=1
default=0
rep[k]=rep.get(k,default)+v
return rep
if __name__=='__main__':
optionParser = getOptionManager([addSearchOptions],
@ -76,10 +101,13 @@ if __name__=='__main__':
(options, entries) = optionParser()
assert options.program in ('fasta35','ssearch35')
taxonomy = Taxonomy(options.taxonomy)
db = fastaIterator(options.database)
taxonlink = {}
rankid = taxonomy.findRankByName(options.explain)
for seq in db:
id = seq.id[0:46]
@ -132,8 +160,50 @@ if __name__=='__main__':
data =(match.query,good[0]['identity'],good[-1]['identity'],good[0]['identity']**options.shape,len(good),lca,scname,rank,order,orname,family,faname,genus,gnname,species,spname)
data =('ID',match.query,good[0]['identity'],good[-1]['identity'],'%4.3f' %(good[0]['identity']**options.shape),len(good),lca,scname,rank,order,orname,family,faname,genus,gnname,species,spname)
else:
data =(match.query,'--','--','--',0,1,'root','no rank','-1','--','-1','--','-1','--','-1','--')
data =('UK',match.query,'--','--','--',0,1,'root','no rank','-1','--','-1','--','-1','--','-1','--')
print '\t'.join([str(x) for x in data])
print '\t'.join([str(x) for x in data])
if good and rankid is not None:
splist=count((taxonomy.getTaxonAtRank(x, rankid),y) for x,y in ((taxonlink[p['ac']],p['identity']) for p in good))
data=[]
for taxon in splist:
species=taxonomy.getSpecies(taxon)
countt = len(splist[taxon])
mini = min(splist[taxon])
maxi = max(splist[taxon])
if species is not None:
spname = taxonomy.getScientificName(species)
else:
spname = '--'
species= '-1'
genus = taxonomy.getGenus(taxon)
if genus is not None:
gnname = taxonomy.getScientificName(genus)
else:
gnname = '--'
genus= '-1'
order = taxonomy.getOrder(taxon)
if order is not None:
orname = taxonomy.getScientificName(order)
else:
orname = '--'
order= '-1'
family = taxonomy.getFamily(taxon)
if family is not None:
faname = taxonomy.getScientificName(family)
else:
faname = '--'
family= '-1'
data.append(('CD',match.query,maxi,mini,'--',countt,taxon,spname,options.explain,order,orname,family,faname,genus,gnname,species,spname))
data.sort(lambda x,y:cmp(y[2], x[2]))
for d in data:
print '\t'.join([str(x) for x in d])