Taxonomy: adding, writing and reading preferred names, changed some

function names, and fixed a bug with taxa indices not being properly
initialized
This commit is contained in:
Celine Mercier
2017-01-16 17:28:20 +01:00
parent 0385a92e02
commit c065c1914a
5 changed files with 444 additions and 57 deletions

View File

@ -18,3 +18,4 @@ cdef class OBI_Taxonomy :
cdef class OBI_Taxon :
cdef ecotx_t* _pointer
cdef OBI_Taxonomy _tax

View File

@ -7,10 +7,10 @@ from .capi.obitaxonomy cimport obi_read_taxonomy, \
obi_write_taxonomy, \
obi_close_taxonomy, \
obi_taxo_get_taxon_with_taxid, \
obi_taxonomy_add_local_taxon, \
obi_taxo_add_local_taxon, \
obi_taxo_add_preferred_name_with_taxon, \
ecotx_t
from ._obidms cimport OBIDMS
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
@ -42,7 +42,7 @@ cdef class OBI_Taxonomy :
if taxon_p == NULL :
raise Exception("Taxon not found")
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
return OBI_Taxon(taxon_capsule)
return OBI_Taxon(taxon_capsule, self)
else :
raise Exception("Not implemented")
@ -60,7 +60,7 @@ cdef class OBI_Taxonomy :
for t in range(self._pointer.taxa.count):
taxon_p = <ecotx_t*> (taxa+t)
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
yield OBI_Taxon(taxon_capsule)
yield OBI_Taxon(taxon_capsule, self)
cpdef write(self, str prefix) :
@ -70,7 +70,7 @@ cdef class OBI_Taxonomy :
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
cdef int taxid
taxid = obi_taxonomy_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
taxid = obi_taxo_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
if taxid < 0 :
raise Exception("Error adding a new taxon to the taxonomy")
else :
@ -85,10 +85,11 @@ cdef class OBI_Taxonomy :
cdef class OBI_Taxon : # TODO dict subclass?
def __init__(self, object taxon_capsule) :
def __init__(self, object taxon_capsule, OBI_Taxonomy tax) :
self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
if self._pointer == NULL :
raise Exception("Error reading the taxonomy")
raise Exception("Error reading a taxon (NULL pointer)")
self._tax = tax
# name property getter
@property
@ -115,12 +116,23 @@ cdef class OBI_Taxon : # TODO dict subclass?
def parent(self):
cdef object parent_capsule
parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
return OBI_Taxon(parent_capsule)
return OBI_Taxon(parent_capsule, self._tax)
# preferred name property getter and setter
@property
def preferred_name(self):
if self._pointer.preferred_name != NULL :
return bytes2str(self._pointer.preferred_name)
@preferred_name.setter
def preferred_name(self, str new_preferred_name) : # @DuplicatedSignature
if (obi_taxo_add_preferred_name_with_taxon(self._tax._pointer, self._pointer, str2bytes(new_preferred_name)) < 0) :
raise Exception("Error adding a new preferred name to a taxon")
def __repr__(self):
d = {}
d['taxid'] = self.taxid
d['name'] = self.name
d['preferred name'] = self.preferred_name
d['parent'] = self.parent.taxid
d['farest'] = self.farest
return str(d)

View File

@ -13,6 +13,7 @@ cdef extern from "obidms_taxonomy.h" nogil:
int32_t farest
ecotxnode* parent
char* name
char* preferred_name
ctypedef ecotxnode ecotx_t
@ -56,4 +57,9 @@ cdef extern from "obidms_taxonomy.h" nogil:
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)

View File

@ -246,7 +246,7 @@ FILE* open_ecorecorddb(const char* file_name,
}
ecorankidx_t* read_rankidx(const char* ranks_file_name)
ecorankidx_t* read_ranks_idx(const char* ranks_file_name)
{
int32_t count;
FILE* ranks_file;
@ -301,7 +301,7 @@ ecorankidx_t* read_rankidx(const char* ranks_file_name)
}
ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name)
ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name)
{
int32_t count_taxa;
int32_t count_local_taxa;
@ -341,10 +341,12 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
for (i=0; i<count_taxa; i++)
{
readnext_ecotaxon(f_taxa, &(taxa_index->taxon[i]));
taxa_index->taxon[i].idx = i;
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
taxa_index->taxon[i].parent->farest = 0;
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
taxa_index->taxon[i].preferred_name = NULL;
}
if (count_local_taxa > 0)
@ -361,6 +363,7 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
taxa_index->taxon[i].parent->farest=0;
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
taxa_index->taxon[i].preferred_name = NULL;
}
for (i=0; i < count_taxa; i++)
@ -431,7 +434,60 @@ econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy
}
econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
{
econameformat_t* raw;
int32_t record_length;
raw = read_ecorecord(f, &record_length);
if (raw == NULL)
return NULL;
name->is_scientific_name = raw->is_scientific_name;
name->name = malloc((raw->name_length + 1) * sizeof(char));
if (name->name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a taxon preferred name");
free(raw);
return NULL;
}
strncpy(name->name, raw->names, raw->name_length);
name->name[raw->name_length] = 0;
name->class_name = malloc((raw->class_length+1) * sizeof(char));
if (name->class_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a taxon class name");
free(name->name);
free(raw);
return NULL;
}
strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
name->class_name[raw->class_length] = 0;
name->taxon = taxonomy->taxa->taxon + raw->taxid;
// Add the preferred name in the taxon structure // TODO discuss: couldn't they all use the same pointer?
(taxonomy->taxa->taxon + raw->taxid)->preferred_name = malloc((raw->name_length + 1) * sizeof(char));
if ((taxonomy->taxa->taxon + raw->taxid)->preferred_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a taxon preferred name");
free(name->name);
free(name->class_name);
free(raw);
return NULL;
}
strcpy((taxonomy->taxa->taxon + raw->taxid)->preferred_name, name->name);
return name;
}
econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
{
int32_t count;
FILE* f;
@ -440,10 +496,7 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
f = open_ecorecorddb(file_name, &count, 0);
if (f == NULL)
{
obidebug(1, "\nError reading taxonomy name file");
return NULL;
}
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
if (index_names == NULL)
@ -473,9 +526,46 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
}
econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
{
int32_t count;
FILE* f;
econameidx_t* index_names;
int32_t i;
f = open_ecorecorddb(file_name, &count, 0);
if (f == NULL)
return NULL;
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
if (index_names == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError reading taxonomy name file");
return NULL;
}
index_names->count = count;
for (i=0; i < count; i++)
{
readnext_ecopreferredname(f, (index_names->names)+i, taxonomy);
if ((index_names->names)+i == NULL)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading taxonomy name file");
free(index_names);
return NULL;
}
}
fclose(f);
return index_names;
}
ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
{
int32_t count;
FILE* f;
@ -528,7 +618,7 @@ ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy
// Functions to write taxonomy structure to binary files
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
{
int i;
char* file_name;
@ -631,7 +721,7 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
}
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
{
int i;
char* file_name;
@ -905,7 +995,7 @@ int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* ta
}
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
{
int i;
char* file_name;
@ -1053,7 +1143,155 @@ int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
}
int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
{
int i;
char* file_name;
int file_descriptor;
off_t file_size;
char* taxonomy_path;
int32_t name_length;
int32_t class_length;
int32_t record_size;
// Compute file size
file_size = sizeof(int32_t); // To store record count
for (i=0; i < (tax->preferred_names)->count; i++)
{
file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen(tax->preferred_names->names[i].name); // To store name
file_size = file_size + strlen(tax->preferred_names->names[i].class_name); // To store name
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating the memory for a binary taxonomy file name");
return -1;
}
// Build the file path
if (sprintf(file_name, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError building a binary taxonomy file name");
return -1;
}
free(taxonomy_path);
// Create file
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (file_descriptor < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError creating a binary taxonomy file");
free(file_name);
return -1;
}
free(file_name);
// Truncate the file to the right size
if (ftruncate(file_descriptor, file_size) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError truncating a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write record count
if (write(file_descriptor, &(tax->preferred_names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write records
for (i=0; i < tax->preferred_names->count; i++)
{
name_length = strlen(tax->preferred_names->names[i].name);
class_length = strlen(tax->preferred_names->names[i].class_name);
record_size = 4*sizeof(int32_t) + name_length + class_length;
// Write record size
if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write if the name is a scientific name
if (write(file_descriptor, &(tax->preferred_names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write name length
if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write class length
if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write taxid index
if (write(file_descriptor, &(tax->preferred_names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write name
if (write(file_descriptor, tax->preferred_names->names[i].name, name_length) < ((ssize_t) name_length))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write class
if (write(file_descriptor, tax->preferred_names->names[i].class_name, class_length) < ((ssize_t) class_length))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
}
// Close file
if (close(file_descriptor) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError closing a pdx taxonomy file");
return -1;
}
return 0;
}
int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
{
int i;
char* file_name;
@ -1182,19 +1420,22 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
free(taxonomy_path);
if (write_rankidx(dms, tax, tax_name) < 0)
if (write_ranks_idx(dms, tax, tax_name) < 0)
return -1;
if (write_taxonomyidx(dms, tax, tax_name) < 0)
if (write_taxonomy_idx(dms, tax, tax_name) < 0)
return -1;
if (write_nameidx(dms, tax, tax_name) < 0)
if (write_names_idx(dms, tax, tax_name) < 0)
return -1;
if (write_mergedidx(dms, tax, tax_name) < 0)
if (write_merged_idx(dms, tax, tax_name) < 0)
return -1;
// Check if there are local taxa (if so last taxon is local)
if ((tax->taxa)->local_count > 0)
if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
return -1;
// Write preferred names if there are some
if (tax->preferred_names != NULL)
if (write_preferred_names_idx(dms, tax, tax_name) < 0)
return -1;
return 0;
}
@ -2117,6 +2358,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
tax->ranks = NULL;
tax->taxa = NULL;
tax->names = NULL;
tax->preferred_names = NULL;
tax->merged_idx = NULL;
tax->dms = NULL;
@ -2295,6 +2537,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
(((tax->taxa)->taxon)[i].parent)->farest = 0;
}
// Initialize preferred names to NULL
for (i=0; i < (tax->taxa)->count; i++)
((tax->taxa)->taxon)[i].preferred_name = NULL;
(tax->taxa)->buffer_size = (tax->taxa)->count;
// Compute longest branches (used to compute distances between taxa faster)
@ -2328,7 +2574,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
}
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
{
int32_t taxid;
ecotx_t* taxon;
@ -2436,6 +2682,81 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const
}
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
{
ecotx_t* taxon;
taxon = obi_taxo_get_taxon_with_taxid(tax, taxid);
return obi_taxo_add_preferred_name_with_taxon(tax, taxon, preferred_name);
}
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
{
econame_t* name_struct;
// Free previous preferred name if there is one
if (taxon->preferred_name != NULL)
free(taxon->preferred_name);
taxon->preferred_name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
if (taxon->preferred_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a new preferred name for a taxon");
return -1;
}
strcpy(taxon->preferred_name, preferred_name);
// Add new name in preferred names structure
// Allocate or reallocate memory for new name
if (tax->preferred_names == NULL)
{
tax->preferred_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t));
(tax->preferred_names)->count = 0;
}
else
tax->preferred_names = (econameidx_t*) realloc(tax->preferred_names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->preferred_names)->count + 1));
if (tax->preferred_names == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new preferred name");
return -1;
}
// Add new preferred name
name_struct = (tax->preferred_names)->names + ((tax->preferred_names)->count);
name_struct->name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
if (name_struct->name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a new taxon preferred name");
return -1;
}
strcpy(name_struct->name, preferred_name);
name_struct->class_name = (char*) malloc((strlen("preferred name") + 1) * sizeof(char));
if (name_struct->class_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a taxon class name to add a new preferred name");
return -1;
}
strcpy(name_struct->class_name, "preferred name");
name_struct->is_scientific_name = false;
name_struct->taxon = taxon;
// Sort preferred names in alphabetical order
qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names);
// Update preferred name count
((tax->preferred_names)->count)++;
return 0;
}
/////// PUBLIC /////////
@ -2448,6 +2769,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
char* merged_idx_file_name;
char* local_taxa_file_name;
char* alter_names_file_name;
char* pref_names_file_name;
int buffer_size;
tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
@ -2461,6 +2783,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
tax->ranks = NULL;
tax->taxa = NULL;
tax->names = NULL;
tax->preferred_names = NULL;
tax->merged_idx = NULL;
tax->dms = dms;
@ -2492,7 +2815,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
free(tax);
return NULL;
}
tax->ranks = read_rankidx(ranks_file_name);
tax->ranks = read_ranks_idx(ranks_file_name);
if (tax->ranks == NULL)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
@ -2543,7 +2866,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
obi_close_taxonomy(tax);
return NULL;
}
tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name);
tax->taxa = read_taxonomy_idx(taxa_file_name, local_taxa_file_name);
if (tax->taxa == NULL)
{
free(taxonomy_path);
@ -2574,7 +2897,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
obi_close_taxonomy(tax);
return NULL;
}
tax->merged_idx = read_mergedidx(merged_idx_file_name, tax);
tax->merged_idx = read_merged_idx(merged_idx_file_name, tax);
if (tax->merged_idx == NULL)
{
free(taxonomy_path);
@ -2584,6 +2907,38 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
}
free(merged_idx_file_name);
// Read preferred names
pref_names_file_name = (char*) malloc(buffer_size*sizeof(char));
if (pref_names_file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for alternative names file name");
free(taxonomy_path);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(pref_names_file_name, buffer_size, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError building alternative names file name");
free(taxonomy_path);
free(pref_names_file_name);
obi_close_taxonomy(tax);
return NULL;
}
tax->preferred_names = read_preferred_names_idx(pref_names_file_name, tax);
if (obi_errno)
{
free(taxonomy_path);
free(pref_names_file_name);
obi_close_taxonomy(tax);
return NULL;
}
free(pref_names_file_name);
if (tax->preferred_names != NULL)
fprintf(stderr, "\nPreferred names read");
// Read alternative names
if (read_alternative_names)
{
@ -2605,7 +2960,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
obi_close_taxonomy(tax);
return NULL;
}
tax->names = read_nameidx(alter_names_file_name, tax);
tax->names = read_names_idx(alter_names_file_name, tax);
if (tax->names == NULL)
{
free(taxonomy_path);
@ -2637,6 +2992,10 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
return -1;
}
// Write preferred names if there are some
if (taxonomy->preferred_names != NULL)
if (write_preferred_names_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
return -1;
if (taxonomy)
{

View File

@ -36,7 +36,8 @@ typedef struct ecotxnode {
int32_t farest;
int32_t idx;
struct ecotxnode* parent;
char* name;
char* name; // scientific name
char* preferred_name; // preferred name
bool local;
} ecotx_t;
@ -98,6 +99,7 @@ typedef struct OBIDMS_taxonomy_t {
ecomergedidx_t* merged_idx;
ecorankidx_t* ranks;
econameidx_t* names;
econameidx_t* preferred_names;
ecotxidx_t* taxa;
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
@ -127,4 +129,11 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);