Taxonomy: adding, writing and reading preferred names, changed some
function names, and fixed a bug with taxa indices not being properly initialized
This commit is contained in:
@@ -18,3 +18,4 @@ cdef class OBI_Taxonomy :
|
||||
cdef class OBI_Taxon :
|
||||
|
||||
cdef ecotx_t* _pointer
|
||||
cdef OBI_Taxonomy _tax
|
||||
|
||||
@@ -7,10 +7,10 @@ from .capi.obitaxonomy cimport obi_read_taxonomy, \
|
||||
obi_write_taxonomy, \
|
||||
obi_close_taxonomy, \
|
||||
obi_taxo_get_taxon_with_taxid, \
|
||||
obi_taxonomy_add_local_taxon, \
|
||||
obi_taxo_add_local_taxon, \
|
||||
obi_taxo_add_preferred_name_with_taxon, \
|
||||
ecotx_t
|
||||
|
||||
|
||||
from ._obidms cimport OBIDMS
|
||||
|
||||
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
||||
@@ -42,7 +42,7 @@ cdef class OBI_Taxonomy :
|
||||
if taxon_p == NULL :
|
||||
raise Exception("Taxon not found")
|
||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
return OBI_Taxon(taxon_capsule)
|
||||
return OBI_Taxon(taxon_capsule, self)
|
||||
else :
|
||||
raise Exception("Not implemented")
|
||||
|
||||
@@ -60,7 +60,7 @@ cdef class OBI_Taxonomy :
|
||||
for t in range(self._pointer.taxa.count):
|
||||
taxon_p = <ecotx_t*> (taxa+t)
|
||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
yield OBI_Taxon(taxon_capsule)
|
||||
yield OBI_Taxon(taxon_capsule, self)
|
||||
|
||||
|
||||
cpdef write(self, str prefix) :
|
||||
@@ -70,7 +70,7 @@ cdef class OBI_Taxonomy :
|
||||
|
||||
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
|
||||
cdef int taxid
|
||||
taxid = obi_taxonomy_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
|
||||
taxid = obi_taxo_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
|
||||
if taxid < 0 :
|
||||
raise Exception("Error adding a new taxon to the taxonomy")
|
||||
else :
|
||||
@@ -85,10 +85,11 @@ cdef class OBI_Taxonomy :
|
||||
|
||||
cdef class OBI_Taxon : # TODO dict subclass?
|
||||
|
||||
def __init__(self, object taxon_capsule) :
|
||||
def __init__(self, object taxon_capsule, OBI_Taxonomy tax) :
|
||||
self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
|
||||
if self._pointer == NULL :
|
||||
raise Exception("Error reading the taxonomy")
|
||||
raise Exception("Error reading a taxon (NULL pointer)")
|
||||
self._tax = tax
|
||||
|
||||
# name property getter
|
||||
@property
|
||||
@@ -115,12 +116,23 @@ cdef class OBI_Taxon : # TODO dict subclass?
|
||||
def parent(self):
|
||||
cdef object parent_capsule
|
||||
parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
|
||||
return OBI_Taxon(parent_capsule)
|
||||
return OBI_Taxon(parent_capsule, self._tax)
|
||||
|
||||
# preferred name property getter and setter
|
||||
@property
|
||||
def preferred_name(self):
|
||||
if self._pointer.preferred_name != NULL :
|
||||
return bytes2str(self._pointer.preferred_name)
|
||||
@preferred_name.setter
|
||||
def preferred_name(self, str new_preferred_name) : # @DuplicatedSignature
|
||||
if (obi_taxo_add_preferred_name_with_taxon(self._tax._pointer, self._pointer, str2bytes(new_preferred_name)) < 0) :
|
||||
raise Exception("Error adding a new preferred name to a taxon")
|
||||
|
||||
def __repr__(self):
|
||||
d = {}
|
||||
d['taxid'] = self.taxid
|
||||
d['name'] = self.name
|
||||
d['preferred name'] = self.preferred_name
|
||||
d['parent'] = self.parent.taxid
|
||||
d['farest'] = self.farest
|
||||
return str(d)
|
||||
|
||||
@@ -13,6 +13,7 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
int32_t farest
|
||||
ecotxnode* parent
|
||||
char* name
|
||||
char* preferred_name
|
||||
|
||||
ctypedef ecotxnode ecotx_t
|
||||
|
||||
@@ -56,4 +57,9 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
|
||||
|
||||
|
||||
+380
-21
@@ -246,7 +246,7 @@ FILE* open_ecorecorddb(const char* file_name,
|
||||
}
|
||||
|
||||
|
||||
ecorankidx_t* read_rankidx(const char* ranks_file_name)
|
||||
ecorankidx_t* read_ranks_idx(const char* ranks_file_name)
|
||||
{
|
||||
int32_t count;
|
||||
FILE* ranks_file;
|
||||
@@ -301,7 +301,7 @@ ecorankidx_t* read_rankidx(const char* ranks_file_name)
|
||||
}
|
||||
|
||||
|
||||
ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name)
|
||||
ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name)
|
||||
{
|
||||
int32_t count_taxa;
|
||||
int32_t count_local_taxa;
|
||||
@@ -341,10 +341,12 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
|
||||
for (i=0; i<count_taxa; i++)
|
||||
{
|
||||
readnext_ecotaxon(f_taxa, &(taxa_index->taxon[i]));
|
||||
taxa_index->taxon[i].idx = i;
|
||||
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
|
||||
taxa_index->taxon[i].parent->farest = 0;
|
||||
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
|
||||
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
|
||||
taxa_index->taxon[i].preferred_name = NULL;
|
||||
}
|
||||
|
||||
if (count_local_taxa > 0)
|
||||
@@ -361,6 +363,7 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
|
||||
taxa_index->taxon[i].parent->farest=0;
|
||||
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
|
||||
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
|
||||
taxa_index->taxon[i].preferred_name = NULL;
|
||||
}
|
||||
|
||||
for (i=0; i < count_taxa; i++)
|
||||
@@ -431,7 +434,60 @@ econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy
|
||||
}
|
||||
|
||||
|
||||
econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||
econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
econameformat_t* raw;
|
||||
int32_t record_length;
|
||||
|
||||
raw = read_ecorecord(f, &record_length);
|
||||
if (raw == NULL)
|
||||
return NULL;
|
||||
|
||||
name->is_scientific_name = raw->is_scientific_name;
|
||||
|
||||
name->name = malloc((raw->name_length + 1) * sizeof(char));
|
||||
if (name->name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a taxon preferred name");
|
||||
free(raw);
|
||||
return NULL;
|
||||
}
|
||||
strncpy(name->name, raw->names, raw->name_length);
|
||||
name->name[raw->name_length] = 0;
|
||||
|
||||
name->class_name = malloc((raw->class_length+1) * sizeof(char));
|
||||
if (name->class_name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a taxon class name");
|
||||
free(name->name);
|
||||
free(raw);
|
||||
return NULL;
|
||||
}
|
||||
strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
|
||||
name->class_name[raw->class_length] = 0;
|
||||
|
||||
name->taxon = taxonomy->taxa->taxon + raw->taxid;
|
||||
|
||||
// Add the preferred name in the taxon structure // TODO discuss: couldn't they all use the same pointer?
|
||||
(taxonomy->taxa->taxon + raw->taxid)->preferred_name = malloc((raw->name_length + 1) * sizeof(char));
|
||||
if ((taxonomy->taxa->taxon + raw->taxid)->preferred_name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a taxon preferred name");
|
||||
free(name->name);
|
||||
free(name->class_name);
|
||||
free(raw);
|
||||
return NULL;
|
||||
}
|
||||
strcpy((taxonomy->taxa->taxon + raw->taxid)->preferred_name, name->name);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
int32_t count;
|
||||
FILE* f;
|
||||
@@ -440,10 +496,7 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
f = open_ecorecorddb(file_name, &count, 0);
|
||||
if (f == NULL)
|
||||
{
|
||||
obidebug(1, "\nError reading taxonomy name file");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
|
||||
if (index_names == NULL)
|
||||
@@ -473,9 +526,46 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||
}
|
||||
|
||||
|
||||
econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
int32_t count;
|
||||
FILE* f;
|
||||
econameidx_t* index_names;
|
||||
int32_t i;
|
||||
|
||||
f = open_ecorecorddb(file_name, &count, 0);
|
||||
if (f == NULL)
|
||||
return NULL;
|
||||
|
||||
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
|
||||
if (index_names == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError reading taxonomy name file");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
index_names->count = count;
|
||||
|
||||
for (i=0; i < count; i++)
|
||||
{
|
||||
readnext_ecopreferredname(f, (index_names->names)+i, taxonomy);
|
||||
if ((index_names->names)+i == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError reading taxonomy name file");
|
||||
free(index_names);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
return index_names;
|
||||
}
|
||||
|
||||
|
||||
ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||
ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
int32_t count;
|
||||
FILE* f;
|
||||
@@ -528,7 +618,7 @@ ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy
|
||||
|
||||
// Functions to write taxonomy structure to binary files
|
||||
|
||||
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
|
||||
int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
|
||||
{
|
||||
int i;
|
||||
char* file_name;
|
||||
@@ -631,7 +721,7 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
|
||||
}
|
||||
|
||||
|
||||
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
||||
int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
||||
{
|
||||
int i;
|
||||
char* file_name;
|
||||
@@ -905,7 +995,7 @@ int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* ta
|
||||
}
|
||||
|
||||
|
||||
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
||||
int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
||||
{
|
||||
int i;
|
||||
char* file_name;
|
||||
@@ -1053,7 +1143,155 @@ int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
|
||||
}
|
||||
|
||||
|
||||
int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
|
||||
int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
||||
{
|
||||
int i;
|
||||
char* file_name;
|
||||
int file_descriptor;
|
||||
off_t file_size;
|
||||
char* taxonomy_path;
|
||||
int32_t name_length;
|
||||
int32_t class_length;
|
||||
int32_t record_size;
|
||||
|
||||
// Compute file size
|
||||
file_size = sizeof(int32_t); // To store record count
|
||||
for (i=0; i < (tax->preferred_names)->count; i++)
|
||||
{
|
||||
file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length
|
||||
file_size = file_size + strlen(tax->preferred_names->names[i].name); // To store name
|
||||
file_size = file_size + strlen(tax->preferred_names->names[i].class_name); // To store name
|
||||
}
|
||||
|
||||
// Build the taxonomy directory path
|
||||
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
|
||||
|
||||
file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
|
||||
if (file_name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating the memory for a binary taxonomy file name");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Build the file path
|
||||
if (sprintf(file_name, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError building a binary taxonomy file name");
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(taxonomy_path);
|
||||
|
||||
// Create file
|
||||
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
|
||||
if (file_descriptor < 0)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError creating a binary taxonomy file");
|
||||
free(file_name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(file_name);
|
||||
|
||||
// Truncate the file to the right size
|
||||
if (ftruncate(file_descriptor, file_size) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError truncating a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write record count
|
||||
if (write(file_descriptor, &(tax->preferred_names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write records
|
||||
for (i=0; i < tax->preferred_names->count; i++)
|
||||
{
|
||||
name_length = strlen(tax->preferred_names->names[i].name);
|
||||
class_length = strlen(tax->preferred_names->names[i].class_name);
|
||||
record_size = 4*sizeof(int32_t) + name_length + class_length;
|
||||
|
||||
// Write record size
|
||||
if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
// Write if the name is a scientific name
|
||||
if (write(file_descriptor, &(tax->preferred_names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
// Write name length
|
||||
if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
// Write class length
|
||||
if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
// Write taxid index
|
||||
if (write(file_descriptor, &(tax->preferred_names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
// Write name
|
||||
if (write(file_descriptor, tax->preferred_names->names[i].name, name_length) < ((ssize_t) name_length))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
// Write class
|
||||
if (write(file_descriptor, tax->preferred_names->names[i].class_name, class_length) < ((ssize_t) class_length))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||
close(file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Close file
|
||||
if (close(file_descriptor) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError closing a pdx taxonomy file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
|
||||
{
|
||||
int i;
|
||||
char* file_name;
|
||||
@@ -1182,19 +1420,22 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
|
||||
|
||||
free(taxonomy_path);
|
||||
|
||||
if (write_rankidx(dms, tax, tax_name) < 0)
|
||||
if (write_ranks_idx(dms, tax, tax_name) < 0)
|
||||
return -1;
|
||||
if (write_taxonomyidx(dms, tax, tax_name) < 0)
|
||||
if (write_taxonomy_idx(dms, tax, tax_name) < 0)
|
||||
return -1;
|
||||
if (write_nameidx(dms, tax, tax_name) < 0)
|
||||
if (write_names_idx(dms, tax, tax_name) < 0)
|
||||
return -1;
|
||||
if (write_mergedidx(dms, tax, tax_name) < 0)
|
||||
if (write_merged_idx(dms, tax, tax_name) < 0)
|
||||
return -1;
|
||||
// Check if there are local taxa (if so last taxon is local)
|
||||
if ((tax->taxa)->local_count > 0)
|
||||
if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
|
||||
return -1;
|
||||
|
||||
// Write preferred names if there are some
|
||||
if (tax->preferred_names != NULL)
|
||||
if (write_preferred_names_idx(dms, tax, tax_name) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2117,6 +2358,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
|
||||
tax->ranks = NULL;
|
||||
tax->taxa = NULL;
|
||||
tax->names = NULL;
|
||||
tax->preferred_names = NULL;
|
||||
tax->merged_idx = NULL;
|
||||
|
||||
tax->dms = NULL;
|
||||
@@ -2295,6 +2537,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
|
||||
(((tax->taxa)->taxon)[i].parent)->farest = 0;
|
||||
}
|
||||
|
||||
// Initialize preferred names to NULL
|
||||
for (i=0; i < (tax->taxa)->count; i++)
|
||||
((tax->taxa)->taxon)[i].preferred_name = NULL;
|
||||
|
||||
(tax->taxa)->buffer_size = (tax->taxa)->count;
|
||||
|
||||
// Compute longest branches (used to compute distances between taxa faster)
|
||||
@@ -2328,7 +2574,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
|
||||
}
|
||||
|
||||
|
||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||
{
|
||||
int32_t taxid;
|
||||
ecotx_t* taxon;
|
||||
@@ -2436,6 +2682,81 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const
|
||||
}
|
||||
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
|
||||
{
|
||||
ecotx_t* taxon;
|
||||
|
||||
taxon = obi_taxo_get_taxon_with_taxid(tax, taxid);
|
||||
|
||||
return obi_taxo_add_preferred_name_with_taxon(tax, taxon, preferred_name);
|
||||
}
|
||||
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
|
||||
{
|
||||
econame_t* name_struct;
|
||||
|
||||
// Free previous preferred name if there is one
|
||||
if (taxon->preferred_name != NULL)
|
||||
free(taxon->preferred_name);
|
||||
|
||||
taxon->preferred_name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
|
||||
if (taxon->preferred_name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a new preferred name for a taxon");
|
||||
return -1;
|
||||
}
|
||||
strcpy(taxon->preferred_name, preferred_name);
|
||||
|
||||
// Add new name in preferred names structure
|
||||
// Allocate or reallocate memory for new name
|
||||
if (tax->preferred_names == NULL)
|
||||
{
|
||||
tax->preferred_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t));
|
||||
(tax->preferred_names)->count = 0;
|
||||
}
|
||||
else
|
||||
tax->preferred_names = (econameidx_t*) realloc(tax->preferred_names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->preferred_names)->count + 1));
|
||||
if (tax->preferred_names == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new preferred name");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Add new preferred name
|
||||
name_struct = (tax->preferred_names)->names + ((tax->preferred_names)->count);
|
||||
name_struct->name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
|
||||
if (name_struct->name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a new taxon preferred name");
|
||||
return -1;
|
||||
}
|
||||
strcpy(name_struct->name, preferred_name);
|
||||
|
||||
name_struct->class_name = (char*) malloc((strlen("preferred name") + 1) * sizeof(char));
|
||||
if (name_struct->class_name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a taxon class name to add a new preferred name");
|
||||
return -1;
|
||||
}
|
||||
strcpy(name_struct->class_name, "preferred name");
|
||||
name_struct->is_scientific_name = false;
|
||||
name_struct->taxon = taxon;
|
||||
|
||||
// Sort preferred names in alphabetical order
|
||||
qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names);
|
||||
|
||||
// Update preferred name count
|
||||
((tax->preferred_names)->count)++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/////// PUBLIC /////////
|
||||
|
||||
|
||||
@@ -2448,6 +2769,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
||||
char* merged_idx_file_name;
|
||||
char* local_taxa_file_name;
|
||||
char* alter_names_file_name;
|
||||
char* pref_names_file_name;
|
||||
int buffer_size;
|
||||
|
||||
tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
|
||||
@@ -2461,6 +2783,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
||||
tax->ranks = NULL;
|
||||
tax->taxa = NULL;
|
||||
tax->names = NULL;
|
||||
tax->preferred_names = NULL;
|
||||
tax->merged_idx = NULL;
|
||||
|
||||
tax->dms = dms;
|
||||
@@ -2492,7 +2815,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
||||
free(tax);
|
||||
return NULL;
|
||||
}
|
||||
tax->ranks = read_rankidx(ranks_file_name);
|
||||
tax->ranks = read_ranks_idx(ranks_file_name);
|
||||
if (tax->ranks == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
@@ -2543,7 +2866,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name);
|
||||
tax->taxa = read_taxonomy_idx(taxa_file_name, local_taxa_file_name);
|
||||
if (tax->taxa == NULL)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
@@ -2574,7 +2897,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
tax->merged_idx = read_mergedidx(merged_idx_file_name, tax);
|
||||
tax->merged_idx = read_merged_idx(merged_idx_file_name, tax);
|
||||
if (tax->merged_idx == NULL)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
@@ -2584,6 +2907,38 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
||||
}
|
||||
free(merged_idx_file_name);
|
||||
|
||||
// Read preferred names
|
||||
pref_names_file_name = (char*) malloc(buffer_size*sizeof(char));
|
||||
if (pref_names_file_name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for alternative names file name");
|
||||
free(taxonomy_path);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
if (snprintf(pref_names_file_name, buffer_size, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError building alternative names file name");
|
||||
free(taxonomy_path);
|
||||
free(pref_names_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
tax->preferred_names = read_preferred_names_idx(pref_names_file_name, tax);
|
||||
if (obi_errno)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
free(pref_names_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
free(pref_names_file_name);
|
||||
|
||||
if (tax->preferred_names != NULL)
|
||||
fprintf(stderr, "\nPreferred names read");
|
||||
|
||||
// Read alternative names
|
||||
if (read_alternative_names)
|
||||
{
|
||||
@@ -2605,7 +2960,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
tax->names = read_nameidx(alter_names_file_name, tax);
|
||||
tax->names = read_names_idx(alter_names_file_name, tax);
|
||||
if (tax->names == NULL)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
@@ -2637,6 +2992,10 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
||||
if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
|
||||
return -1;
|
||||
}
|
||||
// Write preferred names if there are some
|
||||
if (taxonomy->preferred_names != NULL)
|
||||
if (write_preferred_names_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
|
||||
return -1;
|
||||
|
||||
if (taxonomy)
|
||||
{
|
||||
|
||||
+11
-2
@@ -36,7 +36,8 @@ typedef struct ecotxnode {
|
||||
int32_t farest;
|
||||
int32_t idx;
|
||||
struct ecotxnode* parent;
|
||||
char* name;
|
||||
char* name; // scientific name
|
||||
char* preferred_name; // preferred name
|
||||
bool local;
|
||||
} ecotx_t;
|
||||
|
||||
@@ -98,6 +99,7 @@ typedef struct OBIDMS_taxonomy_t {
|
||||
ecomergedidx_t* merged_idx;
|
||||
ecorankidx_t* ranks;
|
||||
econameidx_t* names;
|
||||
econameidx_t* preferred_names;
|
||||
ecotxidx_t* taxa;
|
||||
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
||||
|
||||
@@ -127,4 +129,11 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
||||
|
||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
||||
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user