Add new algorithm in EcoTag to reduce impact of database errors
This commit is contained in:
@ -214,7 +214,7 @@ if __name__=='__main__':
|
||||
|
||||
if best[0] is not None:
|
||||
taxlist = set(taxonlink[p[0].id] for p in match)
|
||||
lca = taxonomy.lastCommonTaxon(*tuple(taxlist))
|
||||
lca = taxonomy.betterCommonTaxon(0.2,*tuple(taxlist))
|
||||
scname = taxonomy.getScientificName(lca)
|
||||
rank = taxonomy.getRank(lca)
|
||||
|
||||
|
@ -1,11 +1,12 @@
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from itertools import count,imap
|
||||
from itertools import count,imap,combinations
|
||||
|
||||
from obitools.ecopcr import EcoPCRDBFile
|
||||
from obitools.utils import universalOpen
|
||||
from obitools.utils import ColumnFile
|
||||
import math
|
||||
|
||||
class Taxonomy(object):
|
||||
def __init__(self):
|
||||
@ -116,11 +117,21 @@ class Taxonomy(object):
|
||||
|
||||
return ancetre
|
||||
|
||||
def betterCommonTaxon(self,error=1,*taxids):
|
||||
lca = self.lastCommonTaxon(*taxids)
|
||||
idx = self._index[lca]
|
||||
sublca = [t[0] for t in self._taxonomy if t[2]==idx]
|
||||
return sublca
|
||||
def depth(self,taxid):
|
||||
return len([x for x in self.parentalTreeIterator(taxid)])
|
||||
|
||||
def betterCommonTaxon(self,error=0.2,*taxids):
|
||||
|
||||
error = math.floor(len(taxids) * error)
|
||||
|
||||
if error >= 1:
|
||||
possible = [self.lastCommonTaxon(x) for x in combinations(taxids,len(taxids)-error)]
|
||||
possible.sort(cmp=lambda t1,t2: cmp(self.depth(t2),self.depth(t1)))
|
||||
lca=possible[0]
|
||||
else:
|
||||
lca = self.lastCommonTaxon(*taxids)
|
||||
|
||||
return lca
|
||||
|
||||
|
||||
def getPreferedName(self,taxid):
|
||||
|
Reference in New Issue
Block a user