From b63d0fb9fb075b237ebdd3c7c8dd5560c883afa2 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Fri, 14 Oct 2016 17:03:10 +0200 Subject: [PATCH] Added C functions to write .rdx, .tdx, .ndx binary taxonomy files from a taxonomy C structure --- python/obitools3/obidms/_obitaxo.pxd | 3 + python/obitools3/obidms/_obitaxo.pyx | 21 +- python/obitools3/obidms/capi/obitaxonomy.pxd | 4 + src/obidms.c | 36 +- src/obidms.h | 6 + src/obidms_taxonomy.c | 477 +++++++++++++++++-- src/obidms_taxonomy.h | 13 +- 7 files changed, 516 insertions(+), 44 deletions(-) diff --git a/python/obitools3/obidms/_obitaxo.pxd b/python/obitools3/obidms/_obitaxo.pxd index 7f7ac52..91c3a5b 100644 --- a/python/obitools3/obidms/_obitaxo.pxd +++ b/python/obitools3/obidms/_obitaxo.pxd @@ -1,14 +1,17 @@ #cython: language_level=3 from .capi.obitaxonomy cimport ecotx_t, OBIDMS_taxonomy_p +from ._obidms cimport OBIDMS cdef class OBI_Taxonomy : cdef str _name cdef OBIDMS_taxonomy_p _pointer + cdef OBIDMS _dms cpdef close(self) + cpdef _write(self, str prefix) cdef class OBI_Taxon : diff --git a/python/obitools3/obidms/_obitaxo.pyx b/python/obitools3/obidms/_obitaxo.pyx index 2a074d0..0466892 100644 --- a/python/obitools3/obidms/_obitaxo.pyx +++ b/python/obitools3/obidms/_obitaxo.pyx @@ -4,7 +4,10 @@ from obitools3.utils cimport bytes2str, str2bytes from .capi.obitaxonomy cimport obi_read_taxonomy, \ obi_close_taxonomy, \ - obi_taxo_get_taxon_with_taxid + obi_taxo_get_taxon_with_taxid, \ + write_rankidx, \ + write_taxonomyidx, \ + write_nameidx from ._obidms cimport OBIDMS @@ -18,6 +21,7 @@ cdef class OBI_Taxonomy : def __init__(self, OBIDMS dms, str name) : + self._dms = dms self._name = name self._pointer = obi_read_taxonomy(dms._pointer, str2bytes(name), True) # TODO discuss # TODO if not found in DMS, try to import? @@ -39,7 +43,16 @@ cdef class OBI_Taxonomy : cpdef close(self) : if (obi_close_taxonomy(self._pointer) < 0) : raise Exception("Error closing the taxonomy") - + + + cpdef _write(self, str prefix) : + if (write_rankidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) : + raise Exception("Error writing the taxonomy rank file") + if (write_taxonomyidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) : + raise Exception("Error writing the taxonomy taxa file") + if (write_nameidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) : + raise Exception("Error writing the taxonomy taxa file") + cdef class OBI_Taxon : # TODO dict subclass? @@ -82,6 +95,6 @@ cdef class OBI_Taxon : # TODO dict subclass? d['parent'] = self.parent.taxid d['farest'] = self.farest return str(d) - - + + diff --git a/python/obitools3/obidms/capi/obitaxonomy.pxd b/python/obitools3/obidms/capi/obitaxonomy.pxd index aad6071..aa61332 100644 --- a/python/obitools3/obidms/capi/obitaxonomy.pxd +++ b/python/obitools3/obidms/capi/obitaxonomy.pxd @@ -40,3 +40,7 @@ cdef extern from "obidms_taxonomy.h" nogil: ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) + + int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name) + int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name) + int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name) diff --git a/src/obidms.c b/src/obidms.c index afdfc27..25a1b70 100644 --- a/src/obidms.c +++ b/src/obidms.c @@ -497,6 +497,33 @@ OBIDMS_p obi_open_dms(const char* dms_path) return NULL; } + // Open the taxonomy directory + dms->tax_directory = opendir_in_dms(dms, TAXONOMY_DIR_NAME); + if (dms->tax_directory == NULL) + { + obi_set_errno(OBIDMS_UNKNOWN_ERROR); + obidebug(1, "\nError opening the taxonomy directory"); + closedir(dms->indexer_directory); + closedir(dms->view_directory); + closedir(dms->directory); + free(dms); + return NULL; + } + + // Store the taxonomy directory's file descriptor + dms->tax_dir_fd = dirfd(dms->tax_directory); + if (dms->tax_dir_fd < 0) + { + obi_set_errno(OBIDMS_UNKNOWN_ERROR); + obidebug(1, "\nError getting the file descriptor of the taxonomy directory"); + closedir(dms->indexer_directory); + closedir(dms->tax_directory); + closedir(dms->view_directory); + closedir(dms->directory); + free(dms); + return NULL; + } + // Initialize the list of opened columns dms->opened_columns = (Opened_columns_list_p) malloc(sizeof(Opened_columns_list_t)); (dms->opened_columns)->nb_opened_columns = 0; @@ -536,7 +563,7 @@ int obi_close_dms(OBIDMS_p dms) while ((dms->opened_columns)->nb_opened_columns > 0) obi_close_column(*((dms->opened_columns)->columns)); - // Close dms, and view and indexer directories + // Close dms, and view, indexer and taxonomy directories if (closedir(dms->indexer_directory) < 0) { obi_set_errno(OBI_INDEXER_ERROR); @@ -551,6 +578,13 @@ int obi_close_dms(OBIDMS_p dms) free(dms); return -1; } + if (closedir(dms->tax_directory) < 0) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError closing a taxonomy directory"); + free(dms); + return -1; + } if (closedir(dms->directory) < 0) { obi_set_errno(OBIDMS_MEMORY_ERROR); diff --git a/src/obidms.h b/src/obidms.h index 39c6499..e984397 100644 --- a/src/obidms.h +++ b/src/obidms.h @@ -106,6 +106,12 @@ typedef struct OBIDMS { int view_dir_fd; /**< The file descriptor of the directory entry * usable to refer and scan the view directory. */ + DIR* tax_directory; /**< A directory entry usable to + * refer and scan the taxonomy directory. + */ + int tax_dir_fd; /**< The file descriptor of the directory entry + * usable to refer and scan the taxonomy directory. + */ bool little_endian; /**< Endianness of the database. */ Opened_columns_list_p opened_columns; /**< List of opened columns. diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c index 91249e4..a0cb518 100644 --- a/src/obidms_taxonomy.c +++ b/src/obidms_taxonomy.c @@ -51,6 +51,27 @@ int compareRankLabel(const void *label1, const void *label2) } +char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name) +{ + char* all_tax_dir_path; + char* tax_path; + + all_tax_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME); + tax_path = (char*) malloc((strlen(all_tax_dir_path) + strlen(tax_name) + 2)*sizeof(char)); + if (sprintf(tax_path, "%s/%s", all_tax_dir_path, tax_name) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError building taxonomy path"); + free(all_tax_dir_path); + return NULL; + } + + free(all_tax_dir_path); + + return tax_path; +} + + int32_t rank_index(const char* label, ecorankidx_t* ranks) { char **rep; @@ -58,7 +79,7 @@ int32_t rank_index(const char* label, ecorankidx_t* ranks) rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), compareRankLabel); if (rep) - return rep-ranks->label; // TODO what??? + return rep-ranks->label; return -1; } @@ -93,8 +114,8 @@ void* read_ecorecord(FILE* f, int32_t* record_size) } // if (!(obi_is_little_endian())) // TODO - if (is_big_endian()) - *record_size=swap_int32_t(*record_size); +// if (is_big_endian()) +// *record_size=swap_int32_t(*record_size); if (buffer_size < *record_size) { @@ -137,13 +158,13 @@ ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon) return NULL; // if (!(obi_is_little_endian())) // TODO - if (is_big_endian()) - { - raw->name_length = swap_int32_t(raw->name_length); - raw->parent = swap_int32_t(raw->parent); - raw->rank = swap_int32_t(raw->rank); - raw->taxid = swap_int32_t(raw->taxid); - } +// if (is_big_endian()) +// { +// raw->name_length = swap_int32_t(raw->name_length); +// raw->parent = swap_int32_t(raw->parent); +// raw->rank = swap_int32_t(raw->rank); +// raw->taxid = swap_int32_t(raw->taxid); +// } taxon->parent = (ecotx_t*) ((size_t) raw->parent); taxon->taxid = raw->taxid; @@ -195,8 +216,8 @@ FILE* open_ecorecorddb(const char* file_name, } // if (!(obi_is_little_endian())) // TODO - if (is_big_endian()) - *count = swap_int32_t(*count); +// if (is_big_endian()) +// *count = swap_int32_t(*count); return f; } @@ -225,6 +246,7 @@ ecorankidx_t* read_rankidx(const char* ranks_file_name) buffer = read_ecorecord(ranks_file, &rank_length); ranks_index->label[i] = (char*) malloc(rank_length+1); strncpy(ranks_index->label[i], buffer, rank_length); + (ranks_index->label[i])[rank_length] = 0; } return ranks_index; @@ -277,6 +299,7 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_ for (; i < count_taxa; i++){ readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i])); + taxa_index->taxon[i].idx = i; taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent->farest=0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) @@ -321,13 +344,13 @@ econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy return NULL; // if (!(obi_is_little_endian())) // TODO - if (is_big_endian()) - { - raw->is_scientific_name = swap_int32_t(raw->is_scientific_name); - raw->name_length = swap_int32_t(raw->name_length); - raw->class_length = swap_int32_t(raw->class_length); - raw->taxid = swap_int32_t(raw->taxid); - } +// if (is_big_endian()) +// { +// raw->is_scientific_name = swap_int32_t(raw->is_scientific_name); +// raw->name_length = swap_int32_t(raw->name_length); +// raw->class_length = swap_int32_t(raw->class_length); +// raw->taxid = swap_int32_t(raw->taxid); +// } name->is_scientific_name = raw->is_scientific_name; @@ -382,7 +405,6 @@ static int bcomptaxon (const void* ptaxid, const void* ptaxon) OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names) { OBIDMS_taxonomy_p tax; - char* main_taxonomy_dir_path; char* taxonomy_path; char* ranks_file_name; char* taxa_file_name; @@ -398,16 +420,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo buffer_size = 2048; // TODO - main_taxonomy_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME); - taxonomy_path = (char*) malloc((strlen(main_taxonomy_dir_path) + strlen(taxonomy_name) + strlen(taxonomy_name) + 3)*sizeof(char)); - if (sprintf(taxonomy_path, "%s/%s/%s", main_taxonomy_dir_path, taxonomy_name, taxonomy_name) < 0) - { - free(main_taxonomy_dir_path); - obi_close_taxonomy(tax); - return NULL; - } - - free(main_taxonomy_dir_path); + taxonomy_path = get_taxonomy_path(dms, taxonomy_name); // Read ranks ranks_file_name = (char*) malloc(buffer_size*sizeof(char)); @@ -417,7 +430,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo obi_close_taxonomy(tax); return NULL; } - if (snprintf(ranks_file_name, buffer_size, "%s.rdx", taxonomy_path) < 0) + if (snprintf(ranks_file_name, buffer_size, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0) { free(taxonomy_path); free(ranks_file_name); @@ -441,7 +454,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo obi_close_taxonomy(tax); return NULL; } - if (snprintf(taxa_file_name, buffer_size,"%s.tdx", taxonomy_path) < 0) + if (snprintf(taxa_file_name, buffer_size, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0) { free(taxonomy_path); free(taxa_file_name); @@ -456,7 +469,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo obi_close_taxonomy(tax); return NULL; } - if (snprintf(local_taxa_file_name, buffer_size,"%s.ldx", taxonomy_path) < 0) + if (snprintf(local_taxa_file_name, buffer_size, "%s/%s.ldx", taxonomy_path, taxonomy_name) < 0) { free(taxonomy_path); free(taxa_file_name); @@ -486,7 +499,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo obi_close_taxonomy(tax); return NULL; } - if (snprintf(alter_names_file_name, buffer_size,"%s.ndx", taxonomy_path) < 0) + if (snprintf(alter_names_file_name, buffer_size, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0) { free(taxonomy_path); free(alter_names_file_name); @@ -514,7 +527,7 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy) if (taxonomy) { if (taxonomy->ranks) - free(taxonomy->ranks); // TODO those don't free everything but mapping will replace anyway + free(taxonomy->ranks); // TODO those don't free everything if (taxonomy->names) free(taxonomy->names); @@ -527,7 +540,7 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy) return 0; } - // TODO no closing files? + // close files return 1; } @@ -699,3 +712,395 @@ ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) return obi_taxo_get_parent_at_rank(taxon, rankindex); } + + + +// Functions to write taxonomy structure to binary files + +int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? +{ + int i; + char* file_name; + int file_descriptor; + off_t file_size; + char* taxonomy_path; + int32_t length; + + // Compute file size + file_size = sizeof(int32_t); + for (i=0; i < (tax->ranks)->count; i++) + { + file_size = file_size + sizeof(int32_t); // To store label size + file_size = file_size + strlen(((tax->ranks)->label)[i]); // To store label + } + + // Build the taxonomy directory path + taxonomy_path = get_taxonomy_path(dms, taxonomy_name); + + file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); + if (file_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); + return -1; + } + + // Build the file path + if (sprintf(file_name, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError building a binary taxonomy file name"); + return -1; + } + + free(taxonomy_path); + + // Create file + file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); + if (file_descriptor < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError creating a binary taxonomy file"); + free(file_name); + return -1; + } + + free(file_name); + + // Truncate the file to the right size + if (ftruncate(file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError truncating a binary taxonomy file"); + close(file_descriptor); + return -1; + } + + // Write rank count + if (write(file_descriptor, &((tax->ranks)->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + + // Write ranks + for (i=0; i < (tax->ranks)->count; i++) + { + length = strlen(((tax->ranks)->label)[i]); + + // Write rank size + if (write(file_descriptor, &length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write rank label + if (write(file_descriptor, ((tax->ranks)->label)[i], length) < ((ssize_t) length)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + } + + // Close file + if (close(file_descriptor) < 0) + { + obi_set_errno(OBIDMS_UNKNOWN_ERROR); + obidebug(1, "\nError closing a DMS information file"); + return -1; + } + + return 0; +} + + +int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? +{ + int i; + char* file_name; + int file_descriptor; + off_t file_size; + char* taxonomy_path; + int32_t name_length; + int32_t record_size; + + // Compute file size + file_size = sizeof(int32_t); // To store record count + for (i=0; i < (tax->taxa)->count; i++) + { + file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length + file_size = file_size + strlen(tax->taxa->taxon[i].name); // To store name + } + + // Build the taxonomy directory path + taxonomy_path = get_taxonomy_path(dms, taxonomy_name); + + file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); + if (file_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); + return -1; + } + + // Build the file path + if (sprintf(file_name, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError building a binary taxonomy file name"); + return -1; + } + + free(taxonomy_path); + + // Create file + file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); + if (file_descriptor < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError creating a binary taxonomy file"); + free(file_name); + return -1; + } + + free(file_name); + + // Truncate the file to the right size + if (ftruncate(file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError truncating a binary taxonomy file"); + close(file_descriptor); + return -1; + } + + // Write record count + if (write(file_descriptor, &(tax->taxa->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + + // Write records + for (i=0; i < tax->taxa->count; i++) + { + name_length = strlen(tax->taxa->taxon[i].name); + record_size = 4*sizeof(int32_t) + name_length; + + // Write record size + if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write taxid + if (write(file_descriptor, &(tax->taxa->taxon[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write rank index + if (write(file_descriptor, &(tax->taxa->taxon[i].rank), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write parent index + if (write(file_descriptor, &((tax->taxa->taxon[i].parent)->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write name length + if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write name + if (write(file_descriptor, tax->taxa->taxon[i].name, name_length) < ((ssize_t) name_length)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + } + + // Close file + if (close(file_descriptor) < 0) + { + obi_set_errno(OBIDMS_UNKNOWN_ERROR); + obidebug(1, "\nError closing a DMS information file"); + return -1; + } + + return 0; +} + + +int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? +{ + int i; + char* file_name; + int file_descriptor; + off_t file_size; + char* taxonomy_path; + int32_t name_length; + int32_t class_length; + int32_t record_size; + + // Compute file size + file_size = sizeof(int32_t); // To store record count + for (i=0; i < (tax->names)->count; i++) + { + file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length + file_size = file_size + strlen(tax->names->names[i].name); // To store name + file_size = file_size + strlen(tax->names->names[i].class_name); // To store name + } + + // Build the taxonomy directory path + taxonomy_path = get_taxonomy_path(dms, taxonomy_name); + + file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); + if (file_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); + return -1; + } + + // Build the file path + if (sprintf(file_name, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError building a binary taxonomy file name"); + return -1; + } + + free(taxonomy_path); + + // Create file + file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); + if (file_descriptor < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError creating a binary taxonomy file"); + free(file_name); + return -1; + } + + free(file_name); + + // Truncate the file to the right size + if (ftruncate(file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError truncating a binary taxonomy file"); + close(file_descriptor); + return -1; + } + + // Write record count + if (write(file_descriptor, &(tax->names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + + // Write records + for (i=0; i < tax->names->count; i++) + { + name_length = strlen(tax->names->names[i].name); + class_length = strlen(tax->names->names[i].class_name); + record_size = 4*sizeof(int32_t) + name_length + class_length; + + // Write record size + if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write if the name is a scientific name + if (write(file_descriptor, &(tax->names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write name length + if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write class length + if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write taxid index + if (write(file_descriptor, &(tax->names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write name + if (write(file_descriptor, tax->names->names[i].name, name_length) < ((ssize_t) name_length)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write class + if (write(file_descriptor, tax->names->names[i].class_name, class_length) < ((ssize_t) class_length)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + } + + // Close file + if (close(file_descriptor) < 0) + { + obi_set_errno(OBIDMS_UNKNOWN_ERROR); + obidebug(1, "\nError closing a DMS information file"); + return -1; + } + + return 0; +} + diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h index ba63feb..d0902f8 100644 --- a/src/obidms_taxonomy.h +++ b/src/obidms_taxonomy.h @@ -1,5 +1,5 @@ /******************************************************************** - * OBIDMS taxonomy headeer file * + * OBIDMS taxonomy header file * ********************************************************************/ /** @@ -34,6 +34,7 @@ typedef struct ecotxnode { int32_t taxid; int32_t rank; int32_t farest; + int32_t idx; struct ecotxnode* parent; char* name; } ecotx_t; @@ -54,10 +55,10 @@ typedef struct { typedef struct { - int32_t is_scientific_name; + int32_t is_scientific_name; int32_t name_length; int32_t class_length; - int32_t taxid; + int32_t taxid; // taxid idx char names[1]; } econameformat_t; @@ -103,3 +104,9 @@ ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); + +int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); +int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); +int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); + +