diff --git a/python/obitools3/obidms/_obidms.cfiles b/python/obitools3/obidms/_obidms.cfiles index b5b7b4f..939bafa 100644 --- a/python/obitools3/obidms/_obidms.cfiles +++ b/python/obitools3/obidms/_obidms.cfiles @@ -20,3 +20,5 @@ ../../../src/encode.c ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidms_taxonomy.c +../../../src/obidms_taxonomy.h diff --git a/python/obitools3/obidms/_obidms.pxd b/python/obitools3/obidms/_obidms.pxd index 92ff1e2..8a4b7cc 100644 --- a/python/obitools3/obidms/_obidms.pxd +++ b/python/obitools3/obidms/_obidms.pxd @@ -4,7 +4,7 @@ from .capi.obidms cimport OBIDMS_p from .capi.obidmscolumn cimport OBIDMS_column_p from .capi.obiview cimport Obiview_p from .capi.obitypes cimport obiversion_t, OBIType_t, index_t - +from ._obitaxo cimport OBI_Taxonomy cdef class OBIDMS_column: @@ -86,6 +86,7 @@ cdef class OBIDMS: cdef str dms_name cpdef close(self) + cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name) cpdef OBIView open_view(self, str view_name) cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*) cpdef dict read_view_infos(self, str view_name) diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx index 9eb4760..d9d8247 100644 --- a/python/obitools3/obidms/_obidms.pyx +++ b/python/obitools3/obidms/_obidms.pyx @@ -26,6 +26,8 @@ from ._obidms cimport OBIDMS, \ OBIView, \ OBIView_line +from ._obitaxo cimport OBI_Taxonomy + from ._obiseq cimport OBI_Nuc_Seq, OBI_Nuc_Seq_Stored from ._obidmscolumn_int cimport OBIDMS_column_int, \ @@ -243,7 +245,8 @@ cdef class OBIView : cdef OBIDMS_column_p* column_pp cdef OBIDMS_column_header_p header cdef index_t* line_selection_p - + cdef object col_capsule + self.dms = dms if line_selection is not None : @@ -649,6 +652,10 @@ cdef class OBIDMS : if (obi_close_dms(self.pointer)) < 0 : raise Exception("Problem closing an OBIDMS") + + cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name) : + return OBI_Taxonomy(self, taxo_name) + cpdef OBIView open_view(self, str view_name) : diff --git a/python/obitools3/obidms/_obitaxo.cfiles b/python/obitools3/obidms/_obitaxo.cfiles new file mode 100644 index 0000000..939bafa --- /dev/null +++ b/python/obitools3/obidms/_obitaxo.cfiles @@ -0,0 +1,24 @@ +../../../src/obidms.h +../../../src/obidms.c +../../../src/obiview.h +../../../src/obiview.c +../../../src/obidmscolumn.h +../../../src/obidmscolumn.c +../../../src/obidmscolumndir.h +../../../src/obidmscolumndir.c +../../../src/obierrno.h +../../../src/obierrno.c +../../../src/obilittlebigman.h +../../../src/obilittlebigman.c +../../../src/obitypes.h +../../../src/obitypes.c +../../../src/private_at_functions.h +../../../src/private_at_functions.c +../../../src/obiavl.h +../../../src/obiavl.c +../../../src/encode.h +../../../src/encode.c +../../../src/obidmscolumn_idx.h +../../../src/obidmscolumn_idx.c +../../../src/obidms_taxonomy.c +../../../src/obidms_taxonomy.h diff --git a/python/obitools3/obidms/_obitaxo.pxd b/python/obitools3/obidms/_obitaxo.pxd new file mode 100644 index 0000000..1d42862 --- /dev/null +++ b/python/obitools3/obidms/_obitaxo.pxd @@ -0,0 +1,31 @@ +#cython: language_level=3 + +from .capi.obitaxonomy cimport ecotx_t, OBIDMS_taxonomy_p + +from libc.stdint cimport int32_t + + +cdef class OBI_Taxonomy : + + cdef str name + cdef OBIDMS_taxonomy_p pointer + + cpdef close(self) + + +cdef class OBI_Taxon : + + cdef ecotx_t* pointer + cdef int32_t taxid + cdef int32_t rank + cdef int32_t farest + cdef ecotx_t* parent + cdef str name + + cpdef int32_t taxid(self) + + cpdef int32_t rank(self) + + cpdef int32_t farest(self) + + cpdef OBI_Taxon parent(self) diff --git a/python/obitools3/obidms/_obitaxo.pyx b/python/obitools3/obidms/_obitaxo.pyx new file mode 100644 index 0000000..7df14d4 --- /dev/null +++ b/python/obitools3/obidms/_obitaxo.pyx @@ -0,0 +1,65 @@ +#cython: language_level=3 + +from obitools3.utils cimport bytes2str, str2bytes + +from .capi.obitaxonomy cimport obi_read_taxonomy, \ + obi_close_taxonomy, \ + obi_taxo_get_taxon_with_taxid + +from ._obidms cimport OBIDMS + +from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer + + +cdef class OBI_Taxonomy : + + def __init__(self, OBIDMS dms, str name) : + + self.name = name + self.pointer = obi_read_taxonomy(dms.pointer, str2bytes(name), True) # TODO discuss + + + def __getitem__(self, object ref): + + cdef ecotx_t* taxon_p + cdef object taxon_capsule + + if type(ref) == int : + taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer, ref) + taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL) + return OBI_Taxon(taxon_capsule) + + + cpdef close(self) : + if (obi_close_taxonomy(self.pointer) < 0) : + raise Exception("Error closing the taxonomy") + + +cdef class OBI_Taxon : # dict subclass? + + def __init__(self, object taxon_capsule) : + + cdef ecotx_t* taxon + + taxon = PyCapsule_GetPointer(taxon_capsule, NULL) + self.pointer = taxon + self.taxid = taxon.taxid + self.rank = taxon.rank + self.farest = taxon.farest + self.parent = taxon.parent + self.name = bytes2str(taxon.name) + + cpdef int32_t taxid(self): + return self.taxid + + cpdef int32_t rank(self): + return self.rank + + cpdef int32_t farest(self): + return self.farest + + cpdef OBI_Taxon parent(self): + cdef object parent_capsule + parent_capsule = PyCapsule_New(self.parent, NULL, NULL) + return OBI_Taxon(parent_capsule) + diff --git a/python/obitools3/obidms/capi/obitaxonomy.pxd b/python/obitools3/obidms/capi/obitaxonomy.pxd new file mode 100644 index 0000000..aad6071 --- /dev/null +++ b/python/obitools3/obidms/capi/obitaxonomy.pxd @@ -0,0 +1,42 @@ +#cython: language_level=3 + +from .obitypes cimport const_char_p +from .obidms cimport OBIDMS_p +from libc.stdint cimport int32_t + + +cdef extern from "obidms_taxonomy.h" nogil: + + struct OBIDMS_taxonomy_t + ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p + + struct ecotxnode : + int32_t taxid + int32_t rank + int32_t farest + ecotxnode* parent + char* name + + ctypedef ecotxnode ecotx_t + + + OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names) + + int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy) + + ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx) + + ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid) + + bint obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid) + + ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) + + ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) + + ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) + + ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) + + ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) + diff --git a/src/obidms.h b/src/obidms.h index b1e0804..2825d14 100644 --- a/src/obidms.h +++ b/src/obidms.h @@ -29,6 +29,8 @@ */ #define AVL_TREES_DIR_NAME "AVL_trees" /**< The name of the AVL trees directory. */ +#define TAXONOMY_DIR_NAME "TAXONOMY" /**< The name of the taxonomy directory. + */ #define MAX_NB_OPENED_COLUMNS (100) /**< The maximum number of columns open at the same time. */ #define MAX_NB_OPENED_AVL_TREES (100) /**< The maximum number of AVL trees open at the same time. diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c new file mode 100644 index 0000000..f280cf2 --- /dev/null +++ b/src/obidms_taxonomy.c @@ -0,0 +1,681 @@ +/******************************************************************** + * OBIDMS taxonomy functions * + ********************************************************************/ + +/** + * @file obidms_taxonomy.c + * @author Celine Mercier (celine.mercier@metabarcoding.org) + * @date March 2nd 2016 + * @brief Functions for reading binary taxonomy files. + */ + + +#include +#include +#include +#include +#include + +#include "obidms_taxonomy.h" +#include "obidms.h" +#include "obidebug.h" +#include "obierrno.h" +#include "private_at_functions.h" + + +#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) + +// TODO : the malloc aren't checked but won't exist for long because mapping instead + + +int compareRankLabel(const void *label1, const void *label2) +{ + return strcmp((const char*)label1,*(const char**)label2); +} + + +int32_t rank_index(const char* label, ecorankidx_t* ranks) +{ + char **rep; + + rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), compareRankLabel); + + if (rep) + return rep-ranks->label; // TODO what??? + + return -1; +} + + +void* read_ecorecord(FILE* f, int32_t* record_size) +{ + static void* buffer = NULL; + int32_t buffer_size = 0; + int32_t read; + + if (!record_size) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError reading a taxonomy file: record_size can not be NULL"); + return NULL; + } + + read = fread(record_size, + 1, + sizeof(int32_t), + f); + + if (feof(f)) + return NULL; + + if (read != sizeof(int32_t)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError reading a taxonomy file: error reading record size"); + return NULL; + } + +// if (is_big_endian()) // TODO +// *recordSize=swap_int32_t(*recordSize); + + if (buffer_size < *record_size) + { + if (buffer) + buffer = realloc(buffer, *record_size); + else + buffer = malloc(*record_size); + if (buffer == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError reading a taxonomy file: error allocating memory"); + return NULL; + } + } + + read = fread(buffer, + 1, + *record_size, + f); + + if (read != *record_size) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError reading a taxonomy file: error reading a record %d, %d", read, *record_size); + return NULL; + } + + return buffer; +}; + + +ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon) +{ + ecotxformat_t* raw; + int32_t record_length; + + raw = read_ecorecord(f, &record_length); + + if (!raw) + return NULL; + +// if (is_big_endian()) // TODO +// { +// raw->namelength = swap_int32_t(raw->namelength); +// raw->parent = swap_int32_t(raw->parent); +// raw->rank = swap_int32_t(raw->rank); +// raw->taxid = swap_int32_t(raw->taxid); +// } + + taxon->parent = (ecotx_t*) ((size_t) raw->parent); + taxon->taxid = raw->taxid; + taxon->rank = raw->rank; + taxon->farest = -1; + + taxon->name = malloc((raw->name_length+1) * sizeof(char)); + + strncpy(taxon->name, raw->name, raw->name_length); + + return taxon; +} + + +FILE* open_ecorecorddb(const char* file_name, + int32_t* count, + int32_t abort_on_open_error) +{ + FILE* f; + int32_t read; + + fprintf(stderr, "\n%s\n", file_name); + + f = fopen(file_name, "rb"); + + if (!f) + { + if (abort_on_open_error) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nCouldn't open a taxonomy file"); + return NULL; + } + else + { + *count = 0; + return NULL; + } + } + + read = fread(count, + 1, + sizeof(int32_t), + f); + + if (read != sizeof(int32_t)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError reading taxonomy record size"); + return NULL; + } + +// if (!obi_is_little_endian()) // TODO +// *count = swap_int32_t(*count); + + return f; +} + + +ecorankidx_t* read_rankidx(const char* ranks_file_name) +{ + int32_t count; + FILE* ranks_file; + ecorankidx_t* ranks_index; + int32_t i; + int32_t rank_length; + char* buffer; + + ranks_file = open_ecorecorddb(ranks_file_name, &count, 0); + + if (ranks_file==NULL) + return NULL; + + ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * (count-1)); + + ranks_index->count = count; + + for (i=0; i < count; i++) + { + buffer = read_ecorecord(ranks_file, &rank_length); + ranks_index->label[i] = (char*) malloc(rank_length+1); + strncpy(ranks_index->label[i], buffer, rank_length); + } + + return ranks_index; +} + + +ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name) +{ + int32_t count_taxa; + int32_t count_local_taxa; + FILE* f_taxa; + FILE* f_local_taxa; + ecotxidx_t* taxa_index; + struct ecotxnode* t; + int32_t i; + int32_t j; + + f_taxa = open_ecorecorddb(taxa_file_name, &count_taxa,0); + + if (f_taxa == NULL) + { + obidebug(1, "\nError reading taxonomy taxa file"); + return NULL; + } + + f_local_taxa = open_ecorecorddb(local_taxa_file_name, &count_local_taxa, 0); + + taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa - 1)); + + taxa_index->count = count_taxa + count_local_taxa; + taxa_index->buffer_size = taxa_index->count; + + taxa_index->max_taxid = 0; + printf("Reading %d taxa...\n", count_taxa); + for (i=0; itaxon[i])); + taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; + taxa_index->taxon[i].parent->farest = 0; + if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) + taxa_index->max_taxid = taxa_index->taxon[i].taxid; + } + + if (count_local_taxa > 0) + printf("Reading %d local taxa...\n", count_local_taxa); + else + printf("No local taxa\n"); + + count_taxa = taxa_index->count; + + for (; i < count_taxa; i++){ + readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i])); + taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; + taxa_index->taxon[i].parent->farest=0; + if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) + taxa_index->max_taxid = taxa_index->taxon[i].taxid; + } + + printf("Computing longest branches...\n"); + + for (i=0; i < count_taxa; i++) + { + t = taxa_index->taxon+i; + if (t->farest == -1) + { + t->farest=0; + while (t->parent != t) + { + j = t->farest + 1; + if (j > t->parent->farest) + { + t->parent->farest = j; + t=t->parent; + } + else + t = taxa_index->taxon; + } + } + } + + return taxa_index; +} + + +econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy) +{ + + econameformat_t* raw; + int32_t record_length; + + raw = read_ecorecord(f, &record_length); + + if (!raw) + return NULL; + +// if (is_big_endian()) // TODO +// { +// raw->is_scientificname = swap_int32_t(raw->is_scientificname); +// raw->namelength = swap_int32_t(raw->namelength); +// raw->classlength = swap_int32_t(raw->classlength); +// raw->taxid = swap_int32_t(raw->taxid); +// } + + name->is_scientific_name = raw->is_scientific_name; + + name->name = malloc((raw->name_length + 1) * sizeof(char)); + strncpy(name->name, raw->names, raw->name_length); + name->name[raw->name_length] = 0; + + name->class_name = malloc((raw->class_length+1) * sizeof(char)); + strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length); + name->class_name[raw->class_length] = 0; + + name->taxon = taxonomy->taxa->taxon + raw->taxid; + + return name; +} + + +econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy) +{ + int32_t count; + FILE* f; + econameidx_t* index_names; + int32_t i; + + f = open_ecorecorddb(file_name, &count, 0); + + if (f == NULL) + return NULL; + + index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * (count-1)); + + index_names->count = count; + + for (i=0; i < count; i++) + readnext_econame(f, (index_names->names)+i, taxonomy); + + return index_names; +} + + +static int bcomptaxon (const void* ptaxid, const void* ptaxon) +{ + ecotx_t* current_taxon = (ecotx_t*) ptaxon; + int32_t taxid = (int32_t) ((size_t) ptaxid); + return taxid - current_taxon->taxid; +} + + +/////// PUBLIC ///////// + + +OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names) +{ + OBIDMS_taxonomy_p tax; + char* main_taxonomy_dir_path; + char* taxonomy_path; + char* ranks_file_name; + char* taxa_file_name; + char* local_taxa_file_name; + char* alter_names_file_name; + int buffer_size; + + tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t)); + + tax->ranks = NULL; + tax->taxa = NULL; + tax->names = NULL; + + buffer_size = 2048; // TODO + + main_taxonomy_dir_path = get_full_path(dms->dir_fd, TAXONOMY_DIR_NAME); + if (asprintf(&taxonomy_path, "%s/%s/%s", main_taxonomy_dir_path, taxonomy_name, taxonomy_name) < 0) + { + free(main_taxonomy_dir_path); + obi_close_taxonomy(tax); + return NULL; + } + + free(main_taxonomy_dir_path); + + // Read ranks + ranks_file_name = (char*) malloc(buffer_size*sizeof(char)); + if (ranks_file_name == NULL) + { + free(taxonomy_path); + obi_close_taxonomy(tax); + return NULL; + } + if (snprintf(ranks_file_name, buffer_size, "%s.rdx", taxonomy_path) < 0) + { + free(taxonomy_path); + free(ranks_file_name); + obi_close_taxonomy(tax); + return NULL; + } + tax->ranks = read_rankidx(ranks_file_name); + if (tax->ranks == NULL) + { + free(ranks_file_name); + obi_close_taxonomy(tax); + return NULL; + } + free(ranks_file_name); + + // Read taxa + taxa_file_name = (char*) malloc(buffer_size*sizeof(char)); + if (taxa_file_name == NULL) + { + free(taxonomy_path); + obi_close_taxonomy(tax); + return NULL; + } + if (snprintf(taxa_file_name, buffer_size,"%s.tdx", taxonomy_path) < 0) + { + free(taxonomy_path); + free(taxa_file_name); + obi_close_taxonomy(tax); + return NULL; + } + local_taxa_file_name = (char*) malloc(buffer_size*sizeof(char)); + if (local_taxa_file_name == NULL) + { + free(taxonomy_path); + free(taxa_file_name); + obi_close_taxonomy(tax); + return NULL; + } + if (snprintf(local_taxa_file_name, buffer_size,"%s.ldx", taxonomy_path) < 0) + { + free(taxonomy_path); + free(taxa_file_name); + free(local_taxa_file_name); + obi_close_taxonomy(tax); + return NULL; + } + tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name); + if (tax->taxa == NULL) + { + free(taxonomy_path); + free(taxa_file_name); + free(local_taxa_file_name); + obi_close_taxonomy(tax); + return NULL; + } + free(taxa_file_name); + free(local_taxa_file_name); + + // Read alternative names + if (read_alternative_names) + { + alter_names_file_name = (char*) malloc(buffer_size*sizeof(char)); + if (alter_names_file_name == NULL) + { + free(taxonomy_path); + obi_close_taxonomy(tax); + return NULL; + } + if (snprintf(alter_names_file_name, buffer_size,"%s.ndx", taxonomy_path) < 0) + { + free(taxonomy_path); + free(alter_names_file_name); + obi_close_taxonomy(tax); + return NULL; + } + tax->names = read_nameidx(alter_names_file_name, tax); + if (tax->names == NULL) + { + free(alter_names_file_name); + obi_close_taxonomy(tax); + return NULL; + } + free(alter_names_file_name); + } + + free(taxonomy_path); + + return tax; +} + + +int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy) +{ + if (taxonomy) + { + if (taxonomy->ranks) + free(taxonomy->ranks); // TODO those don't free everything but mapping will replace anyway + + if (taxonomy->names) + free(taxonomy->names); + + if (taxonomy->taxa) + free(taxonomy->taxa); + + free(taxonomy); + + return 0; + } + + // TODO no closing files? + + return 1; +} + + +////////////////////////////////////////////////////////////////////////// + + +ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx) +{ + ecotx_t* current_taxon; + ecotx_t* next_taxon; + + current_taxon = taxon; + next_taxon = current_taxon->parent; + + while ((current_taxon != next_taxon) && // root node + (current_taxon->rank != rankidx)) + { + current_taxon = next_taxon; + next_taxon = current_taxon->parent; + } + + if (current_taxon->rank == rankidx) + return current_taxon; + else + return NULL; +} + + +ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid) +{ + ecotx_t *current_taxon; + int32_t count; + + count = taxonomy->taxa->count; + + current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid), + (const void *) taxonomy->taxa->taxon, + count, + sizeof(ecotx_t), + bcomptaxon); + return current_taxon; +} + + +bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid) +{ + ecotx_t* next_parent; + + next_parent = taxon->parent; + + while ((other_taxid != next_parent->taxid) && (strcmp(next_parent->name, "root"))) + next_parent = next_parent->parent; + + if (other_taxid == next_parent->taxid) + return 1; + else + return 0; +} + + +ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) +{ + static OBIDMS_taxonomy_p tax = NULL; + static int32_t rankindex = -1; + + if (taxonomy && (tax != taxonomy)) + { + rankindex = rank_index("species", taxonomy->ranks); + tax = taxonomy; + } + + if (!tax || (rankindex < 0)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the species associated with a taxon: No taxonomy defined"); + return NULL; + } + + return obi_taxo_get_parent_at_rank(taxon, rankindex); +} + + +ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) +{ + static OBIDMS_taxonomy_p tax = NULL; + static int32_t rankindex = -1; + + if (taxonomy && (tax != taxonomy)) + { + rankindex = rank_index("genus", taxonomy->ranks); + tax = taxonomy; + } + + if (!tax || (rankindex < 0)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the genus associated with a taxon: No taxonomy defined"); + return NULL; + } + + return obi_taxo_get_parent_at_rank(taxon, rankindex); +} + + +ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) +{ + static OBIDMS_taxonomy_p tax = NULL; + static int32_t rankindex = -1; + + if (taxonomy && (tax != taxonomy)) + { + rankindex = rank_index("family", taxonomy->ranks); + tax = taxonomy; + } + + if (!tax || (rankindex < 0)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the family associated with a taxon: No taxonomy defined"); + return NULL; + } + + return obi_taxo_get_parent_at_rank(taxon, rankindex); +} + + +ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) +{ + static OBIDMS_taxonomy_p tax = NULL; + static int32_t rankindex = -1; + + if (taxonomy && (tax != taxonomy)) + { + rankindex = rank_index("kingdom", taxonomy->ranks); + tax = taxonomy; + } + + if (!tax || (rankindex < 0)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the kingdom associated with a taxon: No taxonomy defined"); + return NULL; + } + + return obi_taxo_get_parent_at_rank(taxon, rankindex); +} + + +ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) +{ + static OBIDMS_taxonomy_p tax = NULL; + static int32_t rankindex = -1; + + if (taxonomy && (tax != taxonomy)) + { + rankindex = rank_index("superkingdom", taxonomy->ranks); + tax = taxonomy; + } + + if (!tax || (rankindex < 0)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError trying to get the superkingdom associated with a taxon: No taxonomy defined"); + return NULL; + } + + return obi_taxo_get_parent_at_rank(taxon, rankindex); +} + diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h new file mode 100644 index 0000000..fd8a510 --- /dev/null +++ b/src/obidms_taxonomy.h @@ -0,0 +1,101 @@ +/******************************************************************** + * OBIDMS taxonomy headeer file * + ********************************************************************/ + +/** + * @file obidms_taxonomy.h + * @author Celine Mercier (celine.mercier@metabarcoding.org) + * @date March 2nd 2016 + * @brief Header file for the functions handling the reading of binary taxonomy files. + */ + + +#include +#include +#include + +#include "obidms.h" + + +typedef struct { + int32_t taxid; + int32_t rank; + int32_t parent; + int32_t name_length; + char name[1]; +} ecotxformat_t; + + +typedef struct ecotxnode { + int32_t taxid; + int32_t rank; + int32_t farest; + struct ecotxnode* parent; + char* name; +} ecotx_t; + + +typedef struct { + int32_t count; + int32_t max_taxid; + int32_t buffer_size; + ecotx_t taxon[1]; +} ecotxidx_t; + + +typedef struct { + int32_t count; + char* label[1]; +} ecorankidx_t; + + +typedef struct { + int32_t is_scientific_name; + int32_t name_length; + int32_t class_length; + int32_t taxid; + char names[1]; +} econameformat_t; + + +typedef struct { + char* name; + char* class_name; + int32_t is_scientific_name; + struct ecotxnode* taxon; +} econame_t; + + +typedef struct { + int32_t count; + econame_t names[1]; +} econameidx_t; + + +typedef struct OBIDMS_taxonomy_t { + ecorankidx_t* ranks; + econameidx_t* names; + ecotxidx_t* taxa; +} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p; + + +OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names); + +int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy); + +ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx); + +ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid); + +bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid); + +ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); + +ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); + +ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); + +ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); + +ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); + diff --git a/src/obierrno.h b/src/obierrno.h index fd2da4f..54a9d7b 100644 --- a/src/obierrno.h +++ b/src/obierrno.h @@ -102,6 +102,8 @@ extern int obi_errno; */ #define OBIVIEW_ERROR (21) /** Error while handling an OBIView */ +#define OBI_TAXONOMY_ERROR (22) /** Error while handling binary taxonomy files + */ /**@}*/ #endif /* OBIERRNO_H_ */