Taxonomy: new functions and improvements
This commit is contained in:
@ -25,8 +25,13 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
ecotx_t* taxon
|
||||
|
||||
|
||||
struct ecorankidx_t :
|
||||
int32_t count
|
||||
char** label
|
||||
|
||||
|
||||
struct OBIDMS_taxonomy_t :
|
||||
# ecorankidx_t* ranks
|
||||
ecorankidx_t* ranks
|
||||
# econameidx_t* names
|
||||
ecotxidx_t* taxa
|
||||
|
||||
@ -63,3 +68,5 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
|
||||
|
||||
const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks)
|
||||
|
@ -0,0 +1,2 @@
|
||||
from .taxo import Taxonomy # @UnresolvedImport
|
||||
from .taxo import Taxon # @UnresolvedImport
|
||||
|
@ -8,15 +8,23 @@ from ..object cimport OBIWrapper
|
||||
|
||||
|
||||
cdef class Taxonomy(OBIWrapper) :
|
||||
cdef str _name # TODO keep as bytes?
|
||||
cdef DMS _dms
|
||||
cdef bytes _name
|
||||
cdef DMS _dms
|
||||
cdef list _ranks
|
||||
|
||||
cdef inline OBIDMS_taxonomy_p pointer(self)
|
||||
|
||||
cpdef get_taxon_by_idx(self, int idx)
|
||||
cpdef write(self, str prefix)
|
||||
cpdef Taxon get_taxon_by_idx(self, int idx)
|
||||
cpdef Taxon get_taxon_by_taxid(self, int taxid)
|
||||
cpdef write(self, object prefix)
|
||||
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=*)
|
||||
|
||||
cpdef object get_species(self, int taxid)
|
||||
cpdef object get_genus(self, int taxid)
|
||||
cpdef object get_family(self, int taxid)
|
||||
cpdef bytes get_scientific_name(self, int taxid)
|
||||
cpdef bytes get_rank(self, int taxid)
|
||||
|
||||
|
||||
cdef class Taxon :
|
||||
cdef ecotx_t* _pointer
|
||||
cdef ecotx_t* _pointer
|
||||
cdef Taxonomy _tax
|
@ -1,6 +1,6 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.utils cimport str2bytes, bytes2str, tobytes
|
||||
from obitools3.utils cimport str2bytes, bytes2str, tobytes, tostr
|
||||
|
||||
from ..capi.obitaxonomy cimport obi_read_taxonomy, \
|
||||
obi_read_taxdump, \
|
||||
@ -9,6 +9,10 @@ from ..capi.obitaxonomy cimport obi_read_taxonomy, \
|
||||
obi_taxo_get_taxon_with_taxid, \
|
||||
obi_taxo_add_local_taxon, \
|
||||
obi_taxo_add_preferred_name_with_taxon, \
|
||||
obi_taxo_rank_index_to_label, \
|
||||
obi_taxo_get_species, \
|
||||
obi_taxo_get_genus, \
|
||||
obi_taxo_get_family, \
|
||||
ecotx_t
|
||||
|
||||
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
||||
@ -22,57 +26,138 @@ cdef class Taxonomy(OBIWrapper) :
|
||||
|
||||
|
||||
@staticmethod
|
||||
def open(DMS dms, str name, bint taxdump=False) :
|
||||
def open(DMS dms, object name) :
|
||||
|
||||
cdef void* pointer
|
||||
cdef Taxonomy taxo
|
||||
|
||||
if taxdump :
|
||||
pointer = <void*>obi_read_taxdump(tobytes(name))
|
||||
else :
|
||||
pointer = <void*>obi_read_taxonomy(dms.pointer(), tobytes(name), True) # TODO discuss
|
||||
# TODO if not found in DMS, try to import?
|
||||
|
||||
|
||||
pointer = <void*>obi_read_taxonomy(dms.pointer(), tobytes(name), True)
|
||||
if pointer == NULL :
|
||||
raise RuntimeError("Error : Cannot read taxonomy %s"
|
||||
% name)
|
||||
% tostr(name))
|
||||
|
||||
taxo = OBIWrapper.new_wrapper(Taxonomy, pointer)
|
||||
|
||||
dms.register(taxo)
|
||||
|
||||
taxo._dms = dms
|
||||
taxo._name = tobytes(name)
|
||||
|
||||
taxo._ranks = []
|
||||
for r in range((<OBIDMS_taxonomy_p>pointer).ranks.count) :
|
||||
taxo._ranks.append(obi_taxo_rank_index_to_label(r, (<OBIDMS_taxonomy_p>pointer).ranks))
|
||||
|
||||
return taxo
|
||||
|
||||
|
||||
@staticmethod
|
||||
def open_taxdump(DMS dms, object name) :
|
||||
|
||||
cdef void* pointer
|
||||
cdef Taxonomy taxo
|
||||
|
||||
pointer = <void*>obi_read_taxdump(tobytes(name))
|
||||
if pointer == NULL :
|
||||
raise RuntimeError("Error : Cannot read taxonomy %s"
|
||||
% tostr(name))
|
||||
|
||||
taxo = OBIWrapper.new_wrapper(Taxonomy, pointer)
|
||||
|
||||
dms.register(taxo)
|
||||
|
||||
taxo._dms = dms
|
||||
taxo._name = name
|
||||
taxo._name = tobytes(name)
|
||||
|
||||
taxo._ranks = []
|
||||
for r in range((<OBIDMS_taxonomy_p>pointer).ranks.count) :
|
||||
taxo._ranks.append(obi_taxo_rank_index_to_label(r, (<OBIDMS_taxonomy_p>pointer).ranks))
|
||||
|
||||
return taxo
|
||||
|
||||
|
||||
def __getitem__(self, object ref):
|
||||
|
||||
cdef ecotx_t* taxon_p
|
||||
cdef object taxon_capsule
|
||||
|
||||
def __getitem__(self, object ref):
|
||||
if type(ref) == int :
|
||||
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), ref)
|
||||
if taxon_p == NULL :
|
||||
raise Exception("Taxon not found")
|
||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
return Taxon(taxon_capsule, self)
|
||||
return self.get_taxon_by_taxid(ref)
|
||||
else :
|
||||
raise Exception("Not implemented")
|
||||
|
||||
|
||||
cpdef get_taxon_by_idx(self, int idx):
|
||||
|
||||
cpdef Taxon get_taxon_by_taxid(self, int taxid):
|
||||
cdef ecotx_t* taxon_p
|
||||
cdef object taxon_capsule
|
||||
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid)
|
||||
if taxon_p == NULL:
|
||||
raise Exception("Error getting a taxon with given taxid", taxid)
|
||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
return Taxon(taxon_capsule, self)
|
||||
|
||||
|
||||
cpdef Taxon get_taxon_by_idx(self, int idx):
|
||||
cdef ecotx_t* taxa
|
||||
cdef ecotx_t* taxon_p
|
||||
cdef object taxon_capsule
|
||||
|
||||
if idx >= self.pointer().taxa.count :
|
||||
raise Exception("Error getting a taxon with given index: no taxid with this index", idx)
|
||||
taxa = self.pointer().taxa.taxon
|
||||
taxon_p = <ecotx_t*> (taxa+idx)
|
||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
return Taxon(taxon_capsule, self)
|
||||
|
||||
|
||||
cpdef object get_species(self, int taxid):
|
||||
cdef ecotx_t* taxon_p
|
||||
cdef ecotx_t* species_p
|
||||
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid)
|
||||
if taxon_p == NULL:
|
||||
raise Exception("Error getting a taxon with given taxid", taxid)
|
||||
species_p = obi_taxo_get_species(taxon_p, self.pointer())
|
||||
if species_p == NULL :
|
||||
return None
|
||||
else :
|
||||
return <int>(species_p.taxid)
|
||||
|
||||
|
||||
cpdef object get_genus(self, int taxid):
|
||||
cdef ecotx_t* taxon_p
|
||||
cdef ecotx_t* genus_p
|
||||
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid)
|
||||
if taxon_p == NULL:
|
||||
raise Exception("Error getting a taxon with given taxid", taxid)
|
||||
genus_p = obi_taxo_get_genus(taxon_p, self.pointer())
|
||||
if genus_p == NULL :
|
||||
return None
|
||||
else :
|
||||
return <int>(genus_p.taxid)
|
||||
|
||||
|
||||
cpdef object get_family(self, int taxid):
|
||||
cdef ecotx_t* taxon_p
|
||||
cdef ecotx_t* family_p
|
||||
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid)
|
||||
if taxon_p == NULL:
|
||||
raise Exception("Error getting a taxon with given taxid", taxid)
|
||||
family_p = obi_taxo_get_family(taxon_p, self.pointer())
|
||||
if family_p == NULL :
|
||||
return None
|
||||
else :
|
||||
return <int>(family_p.taxid)
|
||||
|
||||
|
||||
cpdef bytes get_scientific_name(self, int taxid):
|
||||
cdef ecotx_t* taxon_p
|
||||
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid)
|
||||
if taxon_p == NULL:
|
||||
raise Exception("Error getting a taxon with given taxid", taxid)
|
||||
return taxon_p.name
|
||||
|
||||
|
||||
cpdef bytes get_rank(self, int taxid):
|
||||
cdef ecotx_t* taxon_p
|
||||
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid)
|
||||
if taxon_p == NULL:
|
||||
raise Exception("Error getting a taxon with given taxid", taxid)
|
||||
return self._ranks[taxon_p.rank]
|
||||
|
||||
|
||||
def __len__(self):
|
||||
return self.pointer().taxa.count
|
||||
@ -94,7 +179,7 @@ cdef class Taxonomy(OBIWrapper) :
|
||||
yield Taxon(taxon_capsule, self)
|
||||
|
||||
|
||||
cpdef write(self, str prefix) :
|
||||
cpdef write(self, object prefix) :
|
||||
if obi_write_taxonomy(self._dms.pointer(), self.pointer(), tobytes(prefix)) < 0 :
|
||||
raise Exception("Error writing the taxonomy to binary files")
|
||||
|
||||
@ -108,10 +193,7 @@ cdef class Taxonomy(OBIWrapper) :
|
||||
return taxid
|
||||
|
||||
|
||||
def close(self) :
|
||||
|
||||
cdef OBIDMS_taxonomy_p pointer = self.pointer()
|
||||
|
||||
def close(self) :
|
||||
if self.active() :
|
||||
self._dms.unregister(self)
|
||||
OBIWrapper.close(self)
|
||||
@ -124,6 +206,57 @@ cdef class Taxonomy(OBIWrapper) :
|
||||
@property
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
|
||||
def parental_tree_iterator(self, int taxid):
|
||||
"""
|
||||
return parental tree for given taxonomic id starting from
|
||||
first ancestor to the root.
|
||||
"""
|
||||
cdef Taxon taxon
|
||||
taxon = self.get_taxon_by_idx(taxid)
|
||||
if taxon is not None:
|
||||
while taxon.parent.taxid != 1: # TODO was 0 before?
|
||||
yield taxon
|
||||
taxon = taxon.parent
|
||||
yield self[1]
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
|
||||
def last_common_taxon(self, *taxids):
|
||||
|
||||
cdef list t1
|
||||
cdef list t2
|
||||
cdef Taxon x
|
||||
cdef int count
|
||||
cdef int i
|
||||
cdef int ancestor
|
||||
|
||||
if not taxids:
|
||||
return None
|
||||
if len(taxids)==1:
|
||||
return taxids[0]
|
||||
|
||||
if len(taxids)==2:
|
||||
t1 = [x.taxid for x in self.parental_tree_iterator(taxids[0])]
|
||||
t2 = [x.taxid for x in self.parental_tree_iterator(taxids[1])]
|
||||
t1.reverse()
|
||||
t2.reverse()
|
||||
|
||||
count = min(len(t1),len(t2))
|
||||
i=0
|
||||
while(i < count and t1[i]==t2[i]):
|
||||
i+=1
|
||||
i-=1
|
||||
|
||||
return t1[i]
|
||||
|
||||
ancestor = taxids[0]
|
||||
for taxon in taxids[1:]:
|
||||
ancestor = self.last_common_taxon(ancestor, taxon)
|
||||
|
||||
return ancestor
|
||||
|
||||
|
||||
cdef class Taxon : # TODO dict subclass?
|
||||
@ -143,12 +276,12 @@ cdef class Taxon : # TODO dict subclass?
|
||||
(self.farest == taxon2.farest) and \
|
||||
(self.parent.taxid == taxon2.parent.taxid) and \
|
||||
(self.preferred_name == taxon2.preferred_name)
|
||||
|
||||
|
||||
|
||||
|
||||
# name property getter
|
||||
@property
|
||||
def name(self):
|
||||
return bytes2str(self._pointer.name)
|
||||
return self._pointer.name
|
||||
|
||||
# taxid property getter
|
||||
@property
|
||||
@ -158,7 +291,7 @@ cdef class Taxon : # TODO dict subclass?
|
||||
# rank property getter
|
||||
@property
|
||||
def rank(self):
|
||||
return self._pointer.rank
|
||||
return ((self._tax)._ranks)[(self._pointer).rank]
|
||||
|
||||
# farest property getter
|
||||
@property
|
||||
|
Reference in New Issue
Block a user