Add new algorithm in EcoTag to reduce impact of database errors
This commit is contained in:
@ -214,7 +214,7 @@ if __name__=='__main__':
|
|||||||
|
|
||||||
if best[0] is not None:
|
if best[0] is not None:
|
||||||
taxlist = set(taxonlink[p[0].id] for p in match)
|
taxlist = set(taxonlink[p[0].id] for p in match)
|
||||||
lca = taxonomy.lastCommonTaxon(*tuple(taxlist))
|
lca = taxonomy.betterCommonTaxon(0.2,*tuple(taxlist))
|
||||||
scname = taxonomy.getScientificName(lca)
|
scname = taxonomy.getScientificName(lca)
|
||||||
rank = taxonomy.getRank(lca)
|
rank = taxonomy.getRank(lca)
|
||||||
|
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from itertools import count,imap
|
from itertools import count,imap,combinations
|
||||||
|
|
||||||
from obitools.ecopcr import EcoPCRDBFile
|
from obitools.ecopcr import EcoPCRDBFile
|
||||||
from obitools.utils import universalOpen
|
from obitools.utils import universalOpen
|
||||||
from obitools.utils import ColumnFile
|
from obitools.utils import ColumnFile
|
||||||
|
import math
|
||||||
|
|
||||||
class Taxonomy(object):
|
class Taxonomy(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -116,11 +117,21 @@ class Taxonomy(object):
|
|||||||
|
|
||||||
return ancetre
|
return ancetre
|
||||||
|
|
||||||
def betterCommonTaxon(self,error=1,*taxids):
|
def depth(self,taxid):
|
||||||
|
return len([x for x in self.parentalTreeIterator(taxid)])
|
||||||
|
|
||||||
|
def betterCommonTaxon(self,error=0.2,*taxids):
|
||||||
|
|
||||||
|
error = math.floor(len(taxids) * error)
|
||||||
|
|
||||||
|
if error >= 1:
|
||||||
|
possible = [self.lastCommonTaxon(x) for x in combinations(taxids,len(taxids)-error)]
|
||||||
|
possible.sort(cmp=lambda t1,t2: cmp(self.depth(t2),self.depth(t1)))
|
||||||
|
lca=possible[0]
|
||||||
|
else:
|
||||||
lca = self.lastCommonTaxon(*taxids)
|
lca = self.lastCommonTaxon(*taxids)
|
||||||
idx = self._index[lca]
|
|
||||||
sublca = [t[0] for t in self._taxonomy if t[2]==idx]
|
return lca
|
||||||
return sublca
|
|
||||||
|
|
||||||
|
|
||||||
def getPreferedName(self,taxid):
|
def getPreferedName(self,taxid):
|
||||||
|
Reference in New Issue
Block a user