Taxonomy handling functions in C. Features: read taxdump, read binary

files, write binary files. Not fully handled yet: *.adx, *.pdx, *.ldx,
merged.dmp and delnodes.dmp files.
This commit is contained in:
Celine Mercier
2016-10-27 18:56:11 +02:00
parent b63d0fb9fb
commit d8a257e711
5 changed files with 970 additions and 88 deletions

View File

@ -11,7 +11,7 @@ cdef class OBI_Taxonomy :
cdef OBIDMS _dms
cpdef close(self)
cpdef _write(self, str prefix)
cpdef write(self, str prefix)
cdef class OBI_Taxon :

View File

@ -3,11 +3,11 @@
from obitools3.utils cimport bytes2str, str2bytes
from .capi.obitaxonomy cimport obi_read_taxonomy, \
obi_read_taxdump, \
obi_write_taxonomy, \
obi_close_taxonomy, \
obi_taxo_get_taxon_with_taxid, \
write_rankidx, \
write_taxonomyidx, \
write_nameidx
obi_taxo_get_taxon_with_taxid
from ._obidms cimport OBIDMS
@ -19,11 +19,14 @@ cdef class OBI_Taxonomy :
# TODO function to import taxonomy?
def __init__(self, OBIDMS dms, str name) :
def __init__(self, OBIDMS dms, str name, bint taxdump=False) :
self._dms = dms
self._name = name
self._pointer = obi_read_taxonomy(dms._pointer, str2bytes(name), True) # TODO discuss
if taxdump :
self._pointer = obi_read_taxdump(str2bytes(name))
else :
self._pointer = obi_read_taxonomy(dms._pointer, str2bytes(name), True) # TODO discuss
# TODO if not found in DMS, try to import?
@ -40,19 +43,31 @@ cdef class OBI_Taxonomy :
raise Exception("Not implemented")
# def __iter__(self):
#
# cdef ecotx_t* taxa
# cdef ecotx_t* taxon_p
# cdef object taxon_capsule
#
# taxa = self._pointer.taxa.taxon
#
# # Yield each taxid
# for t in range(self._pointer.taxa.count):
# taxon_p = taxa+t # TODO not compiling for mysterious cython reasons
# taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
# yield OBI_Taxon(taxon_capsule)
cpdef write(self, str prefix) :
if obi_write_taxonomy(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0 :
raise Exception("Error writing the taxonomy to binary files")
cpdef close(self) :
if (obi_close_taxonomy(self._pointer) < 0) :
raise Exception("Error closing the taxonomy")
cpdef _write(self, str prefix) :
if (write_rankidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
raise Exception("Error writing the taxonomy rank file")
if (write_taxonomyidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
raise Exception("Error writing the taxonomy taxa file")
if (write_nameidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
raise Exception("Error writing the taxonomy taxa file")
cdef class OBI_Taxon : # TODO dict subclass?

View File

@ -7,9 +7,6 @@ from libc.stdint cimport int32_t
cdef extern from "obidms_taxonomy.h" nogil:
struct OBIDMS_taxonomy_t
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
struct ecotxnode :
int32_t taxid
int32_t rank
@ -20,8 +17,27 @@ cdef extern from "obidms_taxonomy.h" nogil:
ctypedef ecotxnode ecotx_t
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
struct ecotxidx_t :
int32_t count
int32_t max_taxid
int32_t buffer_size
ecotx_t* taxon
struct OBIDMS_taxonomy_t :
# ecorankidx_t* ranks
# econameidx_t* names
ecotxidx_t* taxa
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
OBIDMS_taxonomy_p obi_read_taxdump(const_char_p taxdump)
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p tax_name)
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
@ -40,7 +56,3 @@ cdef extern from "obidms_taxonomy.h" nogil:
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)

File diff suppressed because it is too large Load Diff

View File

@ -17,10 +17,6 @@
#include "obidms.h"
#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
(((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
typedef struct {
int32_t taxid;
int32_t rank;
@ -72,8 +68,8 @@ typedef struct {
typedef struct {
int32_t count;
econame_t names[1];
int32_t count;
econame_t names[1];
} econameidx_t;
@ -109,4 +105,7 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);