diff --git a/python/obitools3/dms/capi/obitaxonomy.pxd b/python/obitools3/dms/capi/obitaxonomy.pxd index d90693c..80b4913 100644 --- a/python/obitools3/dms/capi/obitaxonomy.pxd +++ b/python/obitools3/dms/capi/obitaxonomy.pxd @@ -25,8 +25,13 @@ cdef extern from "obidms_taxonomy.h" nogil: ecotx_t* taxon + struct ecorankidx_t : + int32_t count + char** label + + struct OBIDMS_taxonomy_t : -# ecorankidx_t* ranks + ecorankidx_t* ranks # econameidx_t* names ecotxidx_t* taxa @@ -63,3 +68,5 @@ cdef extern from "obidms_taxonomy.h" nogil: int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name) + const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks) + \ No newline at end of file diff --git a/python/obitools3/dms/taxo/__init__.py b/python/obitools3/dms/taxo/__init__.py index e69de29..e30a82e 100644 --- a/python/obitools3/dms/taxo/__init__.py +++ b/python/obitools3/dms/taxo/__init__.py @@ -0,0 +1,2 @@ +from .taxo import Taxonomy # @UnresolvedImport +from .taxo import Taxon # @UnresolvedImport diff --git a/python/obitools3/dms/taxo/taxo.pxd b/python/obitools3/dms/taxo/taxo.pxd index 4b67e0f..85c4420 100644 --- a/python/obitools3/dms/taxo/taxo.pxd +++ b/python/obitools3/dms/taxo/taxo.pxd @@ -8,15 +8,23 @@ from ..object cimport OBIWrapper cdef class Taxonomy(OBIWrapper) : - cdef str _name # TODO keep as bytes? - cdef DMS _dms + cdef bytes _name + cdef DMS _dms + cdef list _ranks cdef inline OBIDMS_taxonomy_p pointer(self) - cpdef get_taxon_by_idx(self, int idx) - cpdef write(self, str prefix) + cpdef Taxon get_taxon_by_idx(self, int idx) + cpdef Taxon get_taxon_by_taxid(self, int taxid) + cpdef write(self, object prefix) cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=*) - + cpdef object get_species(self, int taxid) + cpdef object get_genus(self, int taxid) + cpdef object get_family(self, int taxid) + cpdef bytes get_scientific_name(self, int taxid) + cpdef bytes get_rank(self, int taxid) + + cdef class Taxon : - cdef ecotx_t* _pointer + cdef ecotx_t* _pointer cdef Taxonomy _tax \ No newline at end of file diff --git a/python/obitools3/dms/taxo/taxo.pyx b/python/obitools3/dms/taxo/taxo.pyx index e0b3f0a..b49dff1 100644 --- a/python/obitools3/dms/taxo/taxo.pyx +++ b/python/obitools3/dms/taxo/taxo.pyx @@ -1,6 +1,6 @@ #cython: language_level=3 -from obitools3.utils cimport str2bytes, bytes2str, tobytes +from obitools3.utils cimport str2bytes, bytes2str, tobytes, tostr from ..capi.obitaxonomy cimport obi_read_taxonomy, \ obi_read_taxdump, \ @@ -9,6 +9,10 @@ from ..capi.obitaxonomy cimport obi_read_taxonomy, \ obi_taxo_get_taxon_with_taxid, \ obi_taxo_add_local_taxon, \ obi_taxo_add_preferred_name_with_taxon, \ + obi_taxo_rank_index_to_label, \ + obi_taxo_get_species, \ + obi_taxo_get_genus, \ + obi_taxo_get_family, \ ecotx_t from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer @@ -22,57 +26,138 @@ cdef class Taxonomy(OBIWrapper) : @staticmethod - def open(DMS dms, str name, bint taxdump=False) : + def open(DMS dms, object name) : cdef void* pointer cdef Taxonomy taxo - - if taxdump : - pointer = obi_read_taxdump(tobytes(name)) - else : - pointer = obi_read_taxonomy(dms.pointer(), tobytes(name), True) # TODO discuss - # TODO if not found in DMS, try to import? - + + pointer = obi_read_taxonomy(dms.pointer(), tobytes(name), True) if pointer == NULL : raise RuntimeError("Error : Cannot read taxonomy %s" - % name) + % tostr(name)) + + taxo = OBIWrapper.new_wrapper(Taxonomy, pointer) + + dms.register(taxo) + + taxo._dms = dms + taxo._name = tobytes(name) + + taxo._ranks = [] + for r in range((pointer).ranks.count) : + taxo._ranks.append(obi_taxo_rank_index_to_label(r, (pointer).ranks)) + + return taxo + + + @staticmethod + def open_taxdump(DMS dms, object name) : + + cdef void* pointer + cdef Taxonomy taxo + + pointer = obi_read_taxdump(tobytes(name)) + if pointer == NULL : + raise RuntimeError("Error : Cannot read taxonomy %s" + % tostr(name)) taxo = OBIWrapper.new_wrapper(Taxonomy, pointer) dms.register(taxo) taxo._dms = dms - taxo._name = name + taxo._name = tobytes(name) + + taxo._ranks = [] + for r in range((pointer).ranks.count) : + taxo._ranks.append(obi_taxo_rank_index_to_label(r, (pointer).ranks)) return taxo - def __getitem__(self, object ref): - - cdef ecotx_t* taxon_p - cdef object taxon_capsule - + def __getitem__(self, object ref): if type(ref) == int : - taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), ref) - if taxon_p == NULL : - raise Exception("Taxon not found") - taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL) - return Taxon(taxon_capsule, self) + return self.get_taxon_by_taxid(ref) else : raise Exception("Not implemented") - cpdef get_taxon_by_idx(self, int idx): - + cpdef Taxon get_taxon_by_taxid(self, int taxid): + cdef ecotx_t* taxon_p + cdef object taxon_capsule + taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid) + if taxon_p == NULL: + raise Exception("Error getting a taxon with given taxid", taxid) + taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL) + return Taxon(taxon_capsule, self) + + + cpdef Taxon get_taxon_by_idx(self, int idx): cdef ecotx_t* taxa cdef ecotx_t* taxon_p cdef object taxon_capsule - + if idx >= self.pointer().taxa.count : + raise Exception("Error getting a taxon with given index: no taxid with this index", idx) taxa = self.pointer().taxa.taxon taxon_p = (taxa+idx) taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL) return Taxon(taxon_capsule, self) + + cpdef object get_species(self, int taxid): + cdef ecotx_t* taxon_p + cdef ecotx_t* species_p + taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid) + if taxon_p == NULL: + raise Exception("Error getting a taxon with given taxid", taxid) + species_p = obi_taxo_get_species(taxon_p, self.pointer()) + if species_p == NULL : + return None + else : + return (species_p.taxid) + + + cpdef object get_genus(self, int taxid): + cdef ecotx_t* taxon_p + cdef ecotx_t* genus_p + taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid) + if taxon_p == NULL: + raise Exception("Error getting a taxon with given taxid", taxid) + genus_p = obi_taxo_get_genus(taxon_p, self.pointer()) + if genus_p == NULL : + return None + else : + return (genus_p.taxid) + + + cpdef object get_family(self, int taxid): + cdef ecotx_t* taxon_p + cdef ecotx_t* family_p + taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid) + if taxon_p == NULL: + raise Exception("Error getting a taxon with given taxid", taxid) + family_p = obi_taxo_get_family(taxon_p, self.pointer()) + if family_p == NULL : + return None + else : + return (family_p.taxid) + + + cpdef bytes get_scientific_name(self, int taxid): + cdef ecotx_t* taxon_p + taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid) + if taxon_p == NULL: + raise Exception("Error getting a taxon with given taxid", taxid) + return taxon_p.name + + + cpdef bytes get_rank(self, int taxid): + cdef ecotx_t* taxon_p + taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer(), taxid) + if taxon_p == NULL: + raise Exception("Error getting a taxon with given taxid", taxid) + return self._ranks[taxon_p.rank] + def __len__(self): return self.pointer().taxa.count @@ -94,7 +179,7 @@ cdef class Taxonomy(OBIWrapper) : yield Taxon(taxon_capsule, self) - cpdef write(self, str prefix) : + cpdef write(self, object prefix) : if obi_write_taxonomy(self._dms.pointer(), self.pointer(), tobytes(prefix)) < 0 : raise Exception("Error writing the taxonomy to binary files") @@ -108,10 +193,7 @@ cdef class Taxonomy(OBIWrapper) : return taxid - def close(self) : - - cdef OBIDMS_taxonomy_p pointer = self.pointer() - + def close(self) : if self.active() : self._dms.unregister(self) OBIWrapper.close(self) @@ -124,6 +206,57 @@ cdef class Taxonomy(OBIWrapper) : @property def name(self): return self._name + + + def parental_tree_iterator(self, int taxid): + """ + return parental tree for given taxonomic id starting from + first ancestor to the root. + """ + cdef Taxon taxon + taxon = self.get_taxon_by_idx(taxid) + if taxon is not None: + while taxon.parent.taxid != 1: # TODO was 0 before? + yield taxon + taxon = taxon.parent + yield self[1] + else: + raise StopIteration + + + def last_common_taxon(self, *taxids): + + cdef list t1 + cdef list t2 + cdef Taxon x + cdef int count + cdef int i + cdef int ancestor + + if not taxids: + return None + if len(taxids)==1: + return taxids[0] + + if len(taxids)==2: + t1 = [x.taxid for x in self.parental_tree_iterator(taxids[0])] + t2 = [x.taxid for x in self.parental_tree_iterator(taxids[1])] + t1.reverse() + t2.reverse() + + count = min(len(t1),len(t2)) + i=0 + while(i < count and t1[i]==t2[i]): + i+=1 + i-=1 + + return t1[i] + + ancestor = taxids[0] + for taxon in taxids[1:]: + ancestor = self.last_common_taxon(ancestor, taxon) + + return ancestor cdef class Taxon : # TODO dict subclass? @@ -143,12 +276,12 @@ cdef class Taxon : # TODO dict subclass? (self.farest == taxon2.farest) and \ (self.parent.taxid == taxon2.parent.taxid) and \ (self.preferred_name == taxon2.preferred_name) - - + + # name property getter @property def name(self): - return bytes2str(self._pointer.name) + return self._pointer.name # taxid property getter @property @@ -158,7 +291,7 @@ cdef class Taxon : # TODO dict subclass? # rank property getter @property def rank(self): - return self._pointer.rank + return ((self._tax)._ranks)[(self._pointer).rank] # farest property getter @property diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c index aba8641..1ea50b7 100644 --- a/src/obidms_taxonomy.c +++ b/src/obidms_taxonomy.c @@ -148,7 +148,7 @@ static char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name); * @returns The index of a rank in the ecorankidx_t structure. * @retval -1 if the rank was not found. */ -static int32_t rank_index(const char* label, ecorankidx_t* ranks); +static int32_t rank_label_to_index(const char* label, ecorankidx_t* ranks); /** @@ -543,7 +543,7 @@ static char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name) } -static int32_t rank_index(const char* label, ecorankidx_t* ranks) +static int32_t rank_label_to_index(const char* label, ecorankidx_t* ranks) { char **rep; @@ -3501,6 +3501,13 @@ ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx) ecotx_t* current_taxon; ecotx_t* next_taxon; + if (taxon == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError getting the parent of a taxon at a given rank: taxon pointer is NULL"); + return NULL; + } + current_taxon = taxon; next_taxon = current_taxon->parent; @@ -3524,6 +3531,13 @@ ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid ecomerged_t *indexed_taxon; int32_t count; + if (taxonomy == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get a taxon with its taxid: taxonomy pointer is NULL"); + return NULL; + } + count = (taxonomy->merged_idx)->count; indexed_taxon = (ecomerged_t*) bsearch((const void *) ((size_t) taxid), @@ -3543,12 +3557,19 @@ ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid } -bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid) // TODO discuss that this doesn't work with deprecated taxids +int obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid) // TODO discuss that this doesn't work with deprecated taxids { ecotx_t* next_parent; next_parent = taxon->parent; + if (taxon == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError checking if a taxon is under another: taxon pointer is NULL"); + return -1; + } + while ((other_taxid != next_parent->taxid) && (strcmp(next_parent->name, "root"))) next_parent = next_parent->parent; @@ -3561,19 +3582,27 @@ bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid) // TODO ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { - static OBIDMS_taxonomy_p tax = NULL; - static int32_t rankindex = -1; + static int32_t rankindex = -1; - if (taxonomy && (tax != taxonomy)) - { - rankindex = rank_index("species", taxonomy->ranks); - tax = taxonomy; - } - - if (!tax || (rankindex < 0)) + if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); - obidebug(1, "\nError trying to get the species associated with a taxon: No taxonomy defined"); + obidebug(1, "\nError trying to get the species associated with a taxon: taxonomy pointer is NULL"); + return NULL; + } + + if (taxon == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the species associated with a taxon: taxon pointer is NULL"); + return NULL; + } + + rankindex = rank_label_to_index("species", taxonomy->ranks); + if (rankindex < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the species associated with a taxon: error getting rank index"); return NULL; } @@ -3583,19 +3612,27 @@ ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { - static OBIDMS_taxonomy_p tax = NULL; - static int32_t rankindex = -1; + static int32_t rankindex = -1; - if (taxonomy && (tax != taxonomy)) - { - rankindex = rank_index("genus", taxonomy->ranks); - tax = taxonomy; - } - - if (!tax || (rankindex < 0)) + if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); - obidebug(1, "\nError trying to get the genus associated with a taxon: No taxonomy defined"); + obidebug(1, "\nError trying to get the genus associated with a taxon: taxonomy pointer is NULL"); + return NULL; + } + + if (taxon == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the genus associated with a taxon: taxon pointer is NULL"); + return NULL; + } + + rankindex = rank_label_to_index("genus", taxonomy->ranks); + if (rankindex < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the genus associated with a taxon: error getting rank index"); return NULL; } @@ -3605,19 +3642,27 @@ ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { - static OBIDMS_taxonomy_p tax = NULL; - static int32_t rankindex = -1; + static int32_t rankindex = -1; - if (taxonomy && (tax != taxonomy)) - { - rankindex = rank_index("family", taxonomy->ranks); - tax = taxonomy; - } - - if (!tax || (rankindex < 0)) + if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); - obidebug(1, "\nError trying to get the family associated with a taxon: No taxonomy defined"); + obidebug(1, "\nError trying to get the family associated with a taxon: taxonomy pointer is NULL"); + return NULL; + } + + if (taxon == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the family associated with a taxon: taxon pointer is NULL"); + return NULL; + } + + rankindex = rank_label_to_index("family", taxonomy->ranks); + if (rankindex < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the family associated with a taxon: error getting rank index"); return NULL; } @@ -3627,19 +3672,27 @@ ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { - static OBIDMS_taxonomy_p tax = NULL; - static int32_t rankindex = -1; + static int32_t rankindex = -1; - if (taxonomy && (tax != taxonomy)) - { - rankindex = rank_index("kingdom", taxonomy->ranks); - tax = taxonomy; - } - - if (!tax || (rankindex < 0)) + if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); - obidebug(1, "\nError trying to get the kingdom associated with a taxon: No taxonomy defined"); + obidebug(1, "\nError trying to get the kingdom associated with a taxon: taxonomy pointer is NULL"); + return NULL; + } + + if (taxon == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the kingdom associated with a taxon: taxon pointer is NULL"); + return NULL; + } + + rankindex = rank_label_to_index("kingdom", taxonomy->ranks); + if (rankindex < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the kingdom associated with a taxon: error getting rank index"); return NULL; } @@ -3649,22 +3702,36 @@ ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { - static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; - if (taxonomy && (tax != taxonomy)) - { - rankindex = rank_index("superkingdom", taxonomy->ranks); - tax = taxonomy; - } - - if (!tax || (rankindex < 0)) + if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); - obidebug(1, "\nError trying to get the superkingdom associated with a taxon: No taxonomy defined"); + obidebug(1, "\nError trying to get the superkingdom associated with a taxon: taxonomy pointer is NULL"); + return NULL; + } + + if (taxon == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the superkingdom associated with a taxon: taxon pointer is NULL"); + return NULL; + } + + rankindex = rank_label_to_index("superkingdom", taxonomy->ranks); + if (rankindex < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the superkingdom associated with a taxon: error getting rank index"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } + +const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks) +{ + return (ranks->label)[rank_idx]; +} + diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h index dcce499..6d50ba0 100644 --- a/src/obidms_taxonomy.h +++ b/src/obidms_taxonomy.h @@ -308,7 +308,7 @@ int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon * @param rankidx The index of the rank wanted. * * @returns A pointer on the parent taxon at the wanted rank. - * @retval NULL if no parent taxon was found at the wanted rank. + * @retval NULL if no parent taxon was found at the wanted rank or if an error occurred. */ ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx); @@ -320,7 +320,7 @@ ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx); * @param taxid The taxid of the taxon. * * @returns A pointer on the wanted taxon. - * @retval NULL if no taxon was found with the given taxid. + * @retval NULL if no taxon was found with the given taxid or if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) @@ -334,9 +334,12 @@ ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid * @param taxon A pointer on the first taxon. * @param other_taxid The taxid of the second taxon. * - * @returns A boolean indicating whether the first taxon is under the second taxon in the taxonomy tree. + * @returns A value indicating whether the first taxon is under the second taxon in the taxonomy tree. + * @retval 0 if the first taxon is not under the second taxon in the taxonomy tree. + * @retval 1 if the first taxon is under the second taxon in the taxonomy tree. + * @retval -1 if an error occurred. */ -bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid); +int obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid); /** @@ -398,3 +401,16 @@ ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); */ ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); + +/** + * @brief Function returning the label of a rank in an ecorankidx_t structure. + * + * @param rank_idx The index of the rank. + * @param ranks A pointer on an ecorankidx_t structure. + * + * @returns The label of a rank in the ecorankidx_t structure. + * @retval NULL if there is no rank at that index. + * + * @see rank_label_to_index() + */ +const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks);