From c065c1914ac6cf3d740e8f2d285a019bab37b102 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Mon, 16 Jan 2017 17:28:20 +0100 Subject: [PATCH] Taxonomy: adding, writing and reading preferred names, changed some function names, and fixed a bug with taxa indices not being properly initialized --- python/obitools3/obidms/_obitaxo.pxd | 3 +- python/obitools3/obidms/_obitaxo.pyx | 40 +- python/obitools3/obidms/capi/obitaxonomy.pxd | 10 +- src/obidms_taxonomy.c | 435 +++++++++++++++++-- src/obidms_taxonomy.h | 13 +- 5 files changed, 444 insertions(+), 57 deletions(-) diff --git a/python/obitools3/obidms/_obitaxo.pxd b/python/obitools3/obidms/_obitaxo.pxd index 51c6c0f..671d5e1 100644 --- a/python/obitools3/obidms/_obitaxo.pxd +++ b/python/obitools3/obidms/_obitaxo.pxd @@ -17,4 +17,5 @@ cdef class OBI_Taxonomy : cdef class OBI_Taxon : - cdef ecotx_t* _pointer + cdef ecotx_t* _pointer + cdef OBI_Taxonomy _tax diff --git a/python/obitools3/obidms/_obitaxo.pyx b/python/obitools3/obidms/_obitaxo.pyx index b9d7e98..3aae515 100644 --- a/python/obitools3/obidms/_obitaxo.pyx +++ b/python/obitools3/obidms/_obitaxo.pyx @@ -7,10 +7,10 @@ from .capi.obitaxonomy cimport obi_read_taxonomy, \ obi_write_taxonomy, \ obi_close_taxonomy, \ obi_taxo_get_taxon_with_taxid, \ - obi_taxonomy_add_local_taxon, \ + obi_taxo_add_local_taxon, \ + obi_taxo_add_preferred_name_with_taxon, \ ecotx_t - from ._obidms cimport OBIDMS from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer @@ -42,11 +42,11 @@ cdef class OBI_Taxonomy : if taxon_p == NULL : raise Exception("Taxon not found") taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL) - return OBI_Taxon(taxon_capsule) + return OBI_Taxon(taxon_capsule, self) else : raise Exception("Not implemented") - - + + def __iter__(self): cdef ecotx_t* taxa @@ -60,7 +60,7 @@ cdef class OBI_Taxonomy : for t in range(self._pointer.taxa.count): taxon_p = (taxa+t) taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL) - yield OBI_Taxon(taxon_capsule) + yield OBI_Taxon(taxon_capsule, self) cpdef write(self, str prefix) : @@ -70,7 +70,7 @@ cdef class OBI_Taxonomy : cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) : cdef int taxid - taxid = obi_taxonomy_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid) + taxid = obi_taxo_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid) if taxid < 0 : raise Exception("Error adding a new taxon to the taxonomy") else : @@ -85,10 +85,11 @@ cdef class OBI_Taxonomy : cdef class OBI_Taxon : # TODO dict subclass? - def __init__(self, object taxon_capsule) : + def __init__(self, object taxon_capsule, OBI_Taxonomy tax) : self._pointer = PyCapsule_GetPointer(taxon_capsule, NULL) if self._pointer == NULL : - raise Exception("Error reading the taxonomy") + raise Exception("Error reading a taxon (NULL pointer)") + self._tax = tax # name property getter @property @@ -115,14 +116,25 @@ cdef class OBI_Taxon : # TODO dict subclass? def parent(self): cdef object parent_capsule parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL) - return OBI_Taxon(parent_capsule) + return OBI_Taxon(parent_capsule, self._tax) + + # preferred name property getter and setter + @property + def preferred_name(self): + if self._pointer.preferred_name != NULL : + return bytes2str(self._pointer.preferred_name) + @preferred_name.setter + def preferred_name(self, str new_preferred_name) : # @DuplicatedSignature + if (obi_taxo_add_preferred_name_with_taxon(self._tax._pointer, self._pointer, str2bytes(new_preferred_name)) < 0) : + raise Exception("Error adding a new preferred name to a taxon") def __repr__(self): d = {} - d['taxid'] = self.taxid - d['name'] = self.name - d['parent'] = self.parent.taxid - d['farest'] = self.farest + d['taxid'] = self.taxid + d['name'] = self.name + d['preferred name'] = self.preferred_name + d['parent'] = self.parent.taxid + d['farest'] = self.farest return str(d) diff --git a/python/obitools3/obidms/capi/obitaxonomy.pxd b/python/obitools3/obidms/capi/obitaxonomy.pxd index 99cd7e4..d90693c 100644 --- a/python/obitools3/obidms/capi/obitaxonomy.pxd +++ b/python/obitools3/obidms/capi/obitaxonomy.pxd @@ -13,7 +13,8 @@ cdef extern from "obidms_taxonomy.h" nogil: int32_t farest ecotxnode* parent char* name - + char* preferred_name + ctypedef ecotxnode ecotx_t @@ -56,4 +57,9 @@ cdef extern from "obidms_taxonomy.h" nogil: ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) - int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid) + int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid) + + int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name) + + int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name) + diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c index 6295be8..9e081f0 100644 --- a/src/obidms_taxonomy.c +++ b/src/obidms_taxonomy.c @@ -246,7 +246,7 @@ FILE* open_ecorecorddb(const char* file_name, } -ecorankidx_t* read_rankidx(const char* ranks_file_name) +ecorankidx_t* read_ranks_idx(const char* ranks_file_name) { int32_t count; FILE* ranks_file; @@ -301,7 +301,7 @@ ecorankidx_t* read_rankidx(const char* ranks_file_name) } -ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name) +ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name) { int32_t count_taxa; int32_t count_local_taxa; @@ -341,10 +341,12 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_ for (i=0; itaxon[i])); + taxa_index->taxon[i].idx = i; taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent->farest = 0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) taxa_index->max_taxid = taxa_index->taxon[i].taxid; + taxa_index->taxon[i].preferred_name = NULL; } if (count_local_taxa > 0) @@ -361,6 +363,7 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_ taxa_index->taxon[i].parent->farest=0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) taxa_index->max_taxid = taxa_index->taxon[i].taxid; + taxa_index->taxon[i].preferred_name = NULL; } for (i=0; i < count_taxa; i++) @@ -431,7 +434,60 @@ econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy } -econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy) +econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy) +{ + econameformat_t* raw; + int32_t record_length; + + raw = read_ecorecord(f, &record_length); + if (raw == NULL) + return NULL; + + name->is_scientific_name = raw->is_scientific_name; + + name->name = malloc((raw->name_length + 1) * sizeof(char)); + if (name->name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for a taxon preferred name"); + free(raw); + return NULL; + } + strncpy(name->name, raw->names, raw->name_length); + name->name[raw->name_length] = 0; + + name->class_name = malloc((raw->class_length+1) * sizeof(char)); + if (name->class_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for a taxon class name"); + free(name->name); + free(raw); + return NULL; + } + strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length); + name->class_name[raw->class_length] = 0; + + name->taxon = taxonomy->taxa->taxon + raw->taxid; + + // Add the preferred name in the taxon structure // TODO discuss: couldn't they all use the same pointer? + (taxonomy->taxa->taxon + raw->taxid)->preferred_name = malloc((raw->name_length + 1) * sizeof(char)); + if ((taxonomy->taxa->taxon + raw->taxid)->preferred_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for a taxon preferred name"); + free(name->name); + free(name->class_name); + free(raw); + return NULL; + } + strcpy((taxonomy->taxa->taxon + raw->taxid)->preferred_name, name->name); + + return name; +} + + +econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy) { int32_t count; FILE* f; @@ -440,10 +496,7 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy) f = open_ecorecorddb(file_name, &count, 0); if (f == NULL) - { - obidebug(1, "\nError reading taxonomy name file"); - return NULL; - } + return NULL; index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count); if (index_names == NULL) @@ -473,9 +526,46 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy) } +econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy) +{ + int32_t count; + FILE* f; + econameidx_t* index_names; + int32_t i; + + f = open_ecorecorddb(file_name, &count, 0); + if (f == NULL) + return NULL; + + index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count); + if (index_names == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError reading taxonomy name file"); + return NULL; + } + + index_names->count = count; + + for (i=0; i < count; i++) + { + readnext_ecopreferredname(f, (index_names->names)+i, taxonomy); + if ((index_names->names)+i == NULL) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError reading taxonomy name file"); + free(index_names); + return NULL; + } + } + + fclose(f); + + return index_names; +} -ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy) +ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy) { int32_t count; FILE* f; @@ -528,7 +618,7 @@ ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy // Functions to write taxonomy structure to binary files -int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? +int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? { int i; char* file_name; @@ -631,7 +721,7 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name } -int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? +int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; @@ -905,7 +995,7 @@ int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* ta } -int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? +int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; @@ -1053,7 +1143,155 @@ int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name } -int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? +int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? +{ + int i; + char* file_name; + int file_descriptor; + off_t file_size; + char* taxonomy_path; + int32_t name_length; + int32_t class_length; + int32_t record_size; + + // Compute file size + file_size = sizeof(int32_t); // To store record count + for (i=0; i < (tax->preferred_names)->count; i++) + { + file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length + file_size = file_size + strlen(tax->preferred_names->names[i].name); // To store name + file_size = file_size + strlen(tax->preferred_names->names[i].class_name); // To store name + } + + // Build the taxonomy directory path + taxonomy_path = get_taxonomy_path(dms, taxonomy_name); + + file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char)); + if (file_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); + return -1; + } + + // Build the file path + if (sprintf(file_name, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError building a binary taxonomy file name"); + return -1; + } + + free(taxonomy_path); + + // Create file + file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); + if (file_descriptor < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError creating a binary taxonomy file"); + free(file_name); + return -1; + } + + free(file_name); + + // Truncate the file to the right size + if (ftruncate(file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError truncating a binary taxonomy file"); + close(file_descriptor); + return -1; + } + + // Write record count + if (write(file_descriptor, &(tax->preferred_names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + + // Write records + for (i=0; i < tax->preferred_names->count; i++) + { + name_length = strlen(tax->preferred_names->names[i].name); + class_length = strlen(tax->preferred_names->names[i].class_name); + record_size = 4*sizeof(int32_t) + name_length + class_length; + + // Write record size + if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write if the name is a scientific name + if (write(file_descriptor, &(tax->preferred_names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write name length + if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write class length + if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write taxid index + if (write(file_descriptor, &(tax->preferred_names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write name + if (write(file_descriptor, tax->preferred_names->names[i].name, name_length) < ((ssize_t) name_length)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + // Write class + if (write(file_descriptor, tax->preferred_names->names[i].class_name, class_length) < ((ssize_t) class_length)) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError writing in a binary taxonomy file"); + close(file_descriptor); + return -1; + } + } + + // Close file + if (close(file_descriptor) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError closing a pdx taxonomy file"); + return -1; + } + + return 0; +} + + +int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? { int i; char* file_name; @@ -1182,19 +1420,22 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name free(taxonomy_path); - if (write_rankidx(dms, tax, tax_name) < 0) - return -1; - if (write_taxonomyidx(dms, tax, tax_name) < 0) - return -1; - if (write_nameidx(dms, tax, tax_name) < 0) - return -1; - if (write_mergedidx(dms, tax, tax_name) < 0) - return -1; - // Check if there are local taxa (if so last taxon is local) - if ((tax->taxa)->local_count > 0) - if (write_local_taxonomy_idx(dms, tax, tax_name) < 0) - return -1; - + if (write_ranks_idx(dms, tax, tax_name) < 0) + return -1; + if (write_taxonomy_idx(dms, tax, tax_name) < 0) + return -1; + if (write_names_idx(dms, tax, tax_name) < 0) + return -1; + if (write_merged_idx(dms, tax, tax_name) < 0) + return -1; + // Check if there are local taxa (if so last taxon is local) + if ((tax->taxa)->local_count > 0) + if (write_local_taxonomy_idx(dms, tax, tax_name) < 0) + return -1; + // Write preferred names if there are some + if (tax->preferred_names != NULL) + if (write_preferred_names_idx(dms, tax, tax_name) < 0) + return -1; return 0; } @@ -2114,10 +2355,11 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump) obidebug(1, "\nError allocating the memory for a taxonomy structure"); return NULL; } - tax->ranks = NULL; - tax->taxa = NULL; - tax->names = NULL; - tax->merged_idx = NULL; + tax->ranks = NULL; + tax->taxa = NULL; + tax->names = NULL; + tax->preferred_names = NULL; + tax->merged_idx = NULL; tax->dms = NULL; (tax->tax_name)[0] = '\0'; @@ -2295,6 +2537,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump) (((tax->taxa)->taxon)[i].parent)->farest = 0; } + // Initialize preferred names to NULL + for (i=0; i < (tax->taxa)->count; i++) + ((tax->taxa)->taxon)[i].preferred_name = NULL; + (tax->taxa)->buffer_size = (tax->taxa)->count; // Compute longest branches (used to compute distances between taxa faster) @@ -2328,7 +2574,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump) } -int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid) +int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid) { int32_t taxid; ecotx_t* taxon; @@ -2436,6 +2682,81 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const } +int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name) +{ + ecotx_t* taxon; + + taxon = obi_taxo_get_taxon_with_taxid(tax, taxid); + + return obi_taxo_add_preferred_name_with_taxon(tax, taxon, preferred_name); +} + + +int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name) +{ + econame_t* name_struct; + + // Free previous preferred name if there is one + if (taxon->preferred_name != NULL) + free(taxon->preferred_name); + + taxon->preferred_name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char)); + if (taxon->preferred_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for a new preferred name for a taxon"); + return -1; + } + strcpy(taxon->preferred_name, preferred_name); + + // Add new name in preferred names structure + // Allocate or reallocate memory for new name + if (tax->preferred_names == NULL) + { + tax->preferred_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t)); + (tax->preferred_names)->count = 0; + } + else + tax->preferred_names = (econameidx_t*) realloc(tax->preferred_names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->preferred_names)->count + 1)); + if (tax->preferred_names == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new preferred name"); + return -1; + } + + // Add new preferred name + name_struct = (tax->preferred_names)->names + ((tax->preferred_names)->count); + name_struct->name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char)); + if (name_struct->name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for a new taxon preferred name"); + return -1; + } + strcpy(name_struct->name, preferred_name); + + name_struct->class_name = (char*) malloc((strlen("preferred name") + 1) * sizeof(char)); + if (name_struct->class_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for a taxon class name to add a new preferred name"); + return -1; + } + strcpy(name_struct->class_name, "preferred name"); + name_struct->is_scientific_name = false; + name_struct->taxon = taxon; + + // Sort preferred names in alphabetical order + qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names); + + // Update preferred name count + ((tax->preferred_names)->count)++; + + return 0; +} + + /////// PUBLIC ///////// @@ -2448,6 +2769,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo char* merged_idx_file_name; char* local_taxa_file_name; char* alter_names_file_name; + char* pref_names_file_name; int buffer_size; tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t)); @@ -2458,10 +2780,11 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo return NULL; } - tax->ranks = NULL; - tax->taxa = NULL; - tax->names = NULL; - tax->merged_idx = NULL; + tax->ranks = NULL; + tax->taxa = NULL; + tax->names = NULL; + tax->preferred_names = NULL; + tax->merged_idx = NULL; tax->dms = dms; @@ -2492,7 +2815,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo free(tax); return NULL; } - tax->ranks = read_rankidx(ranks_file_name); + tax->ranks = read_ranks_idx(ranks_file_name); if (tax->ranks == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); @@ -2543,7 +2866,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo obi_close_taxonomy(tax); return NULL; } - tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name); + tax->taxa = read_taxonomy_idx(taxa_file_name, local_taxa_file_name); if (tax->taxa == NULL) { free(taxonomy_path); @@ -2574,7 +2897,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo obi_close_taxonomy(tax); return NULL; } - tax->merged_idx = read_mergedidx(merged_idx_file_name, tax); + tax->merged_idx = read_merged_idx(merged_idx_file_name, tax); if (tax->merged_idx == NULL) { free(taxonomy_path); @@ -2584,6 +2907,38 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo } free(merged_idx_file_name); + // Read preferred names + pref_names_file_name = (char*) malloc(buffer_size*sizeof(char)); + if (pref_names_file_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for alternative names file name"); + free(taxonomy_path); + obi_close_taxonomy(tax); + return NULL; + } + if (snprintf(pref_names_file_name, buffer_size, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0) + { + obi_set_errno(OBI_TAXONOMY_ERROR); + obidebug(1, "\nError building alternative names file name"); + free(taxonomy_path); + free(pref_names_file_name); + obi_close_taxonomy(tax); + return NULL; + } + tax->preferred_names = read_preferred_names_idx(pref_names_file_name, tax); + if (obi_errno) + { + free(taxonomy_path); + free(pref_names_file_name); + obi_close_taxonomy(tax); + return NULL; + } + free(pref_names_file_name); + + if (tax->preferred_names != NULL) + fprintf(stderr, "\nPreferred names read"); + // Read alternative names if (read_alternative_names) { @@ -2605,7 +2960,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo obi_close_taxonomy(tax); return NULL; } - tax->names = read_nameidx(alter_names_file_name, tax); + tax->names = read_names_idx(alter_names_file_name, tax); if (tax->names == NULL) { free(taxonomy_path); @@ -2637,6 +2992,10 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy) if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0) return -1; } + // Write preferred names if there are some + if (taxonomy->preferred_names != NULL) + if (write_preferred_names_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0) + return -1; if (taxonomy) { diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h index 33d8aba..fa2f511 100644 --- a/src/obidms_taxonomy.h +++ b/src/obidms_taxonomy.h @@ -36,7 +36,8 @@ typedef struct ecotxnode { int32_t farest; int32_t idx; struct ecotxnode* parent; - char* name; + char* name; // scientific name + char* preferred_name; // preferred name bool local; } ecotx_t; @@ -98,6 +99,7 @@ typedef struct OBIDMS_taxonomy_t { ecomergedidx_t* merged_idx; ecorankidx_t* ranks; econameidx_t* names; + econameidx_t* preferred_names; ecotxidx_t* taxa; } OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p; @@ -127,4 +129,11 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump); -int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid); +int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid); + +int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name); + +int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name); + + +