Taxonomy handling functions in C. Features: read taxdump, read binary
files, write binary files. Not fully handled yet: *.adx, *.pdx, *.ldx, merged.dmp and delnodes.dmp files.
This commit is contained in:
@ -11,7 +11,7 @@ cdef class OBI_Taxonomy :
|
||||
cdef OBIDMS _dms
|
||||
|
||||
cpdef close(self)
|
||||
cpdef _write(self, str prefix)
|
||||
cpdef write(self, str prefix)
|
||||
|
||||
|
||||
cdef class OBI_Taxon :
|
||||
|
@ -3,11 +3,11 @@
|
||||
from obitools3.utils cimport bytes2str, str2bytes
|
||||
|
||||
from .capi.obitaxonomy cimport obi_read_taxonomy, \
|
||||
obi_read_taxdump, \
|
||||
obi_write_taxonomy, \
|
||||
obi_close_taxonomy, \
|
||||
obi_taxo_get_taxon_with_taxid, \
|
||||
write_rankidx, \
|
||||
write_taxonomyidx, \
|
||||
write_nameidx
|
||||
obi_taxo_get_taxon_with_taxid
|
||||
|
||||
|
||||
from ._obidms cimport OBIDMS
|
||||
|
||||
@ -19,11 +19,14 @@ cdef class OBI_Taxonomy :
|
||||
|
||||
# TODO function to import taxonomy?
|
||||
|
||||
def __init__(self, OBIDMS dms, str name) :
|
||||
def __init__(self, OBIDMS dms, str name, bint taxdump=False) :
|
||||
|
||||
self._dms = dms
|
||||
self._name = name
|
||||
self._pointer = obi_read_taxonomy(dms._pointer, str2bytes(name), True) # TODO discuss
|
||||
if taxdump :
|
||||
self._pointer = obi_read_taxdump(str2bytes(name))
|
||||
else :
|
||||
self._pointer = obi_read_taxonomy(dms._pointer, str2bytes(name), True) # TODO discuss
|
||||
# TODO if not found in DMS, try to import?
|
||||
|
||||
|
||||
@ -40,19 +43,31 @@ cdef class OBI_Taxonomy :
|
||||
raise Exception("Not implemented")
|
||||
|
||||
|
||||
# def __iter__(self):
|
||||
#
|
||||
# cdef ecotx_t* taxa
|
||||
# cdef ecotx_t* taxon_p
|
||||
# cdef object taxon_capsule
|
||||
#
|
||||
# taxa = self._pointer.taxa.taxon
|
||||
#
|
||||
# # Yield each taxid
|
||||
# for t in range(self._pointer.taxa.count):
|
||||
# taxon_p = taxa+t # TODO not compiling for mysterious cython reasons
|
||||
# taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
# yield OBI_Taxon(taxon_capsule)
|
||||
|
||||
|
||||
cpdef write(self, str prefix) :
|
||||
if obi_write_taxonomy(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0 :
|
||||
raise Exception("Error writing the taxonomy to binary files")
|
||||
|
||||
|
||||
cpdef close(self) :
|
||||
if (obi_close_taxonomy(self._pointer) < 0) :
|
||||
raise Exception("Error closing the taxonomy")
|
||||
|
||||
|
||||
cpdef _write(self, str prefix) :
|
||||
if (write_rankidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
|
||||
raise Exception("Error writing the taxonomy rank file")
|
||||
if (write_taxonomyidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
|
||||
raise Exception("Error writing the taxonomy taxa file")
|
||||
if (write_nameidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
|
||||
raise Exception("Error writing the taxonomy taxa file")
|
||||
|
||||
|
||||
cdef class OBI_Taxon : # TODO dict subclass?
|
||||
|
||||
|
@ -7,9 +7,6 @@ from libc.stdint cimport int32_t
|
||||
|
||||
cdef extern from "obidms_taxonomy.h" nogil:
|
||||
|
||||
struct OBIDMS_taxonomy_t
|
||||
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
|
||||
|
||||
struct ecotxnode :
|
||||
int32_t taxid
|
||||
int32_t rank
|
||||
@ -20,8 +17,27 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
ctypedef ecotxnode ecotx_t
|
||||
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
|
||||
struct ecotxidx_t :
|
||||
int32_t count
|
||||
int32_t max_taxid
|
||||
int32_t buffer_size
|
||||
ecotx_t* taxon
|
||||
|
||||
|
||||
struct OBIDMS_taxonomy_t :
|
||||
# ecorankidx_t* ranks
|
||||
# econameidx_t* names
|
||||
ecotxidx_t* taxa
|
||||
|
||||
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
|
||||
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxdump(const_char_p taxdump)
|
||||
|
||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p tax_name)
|
||||
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
|
||||
@ -40,7 +56,3 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
|
||||
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)
|
||||
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)
|
||||
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -17,10 +17,6 @@
|
||||
#include "obidms.h"
|
||||
|
||||
|
||||
#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
|
||||
(((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
@ -72,8 +68,8 @@ typedef struct {
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
econame_t names[1];
|
||||
int32_t count;
|
||||
econame_t names[1];
|
||||
} econameidx_t;
|
||||
|
||||
|
||||
@ -109,4 +105,7 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
|
||||
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
|
||||
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
|
||||
|
||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
||||
|
||||
|
Reference in New Issue
Block a user