Taxonomy: adding, writing and reading preferred names, changed some
function names, and fixed a bug with taxa indices not being properly initialized
This commit is contained in:
@ -18,3 +18,4 @@ cdef class OBI_Taxonomy :
|
|||||||
cdef class OBI_Taxon :
|
cdef class OBI_Taxon :
|
||||||
|
|
||||||
cdef ecotx_t* _pointer
|
cdef ecotx_t* _pointer
|
||||||
|
cdef OBI_Taxonomy _tax
|
||||||
|
@ -7,10 +7,10 @@ from .capi.obitaxonomy cimport obi_read_taxonomy, \
|
|||||||
obi_write_taxonomy, \
|
obi_write_taxonomy, \
|
||||||
obi_close_taxonomy, \
|
obi_close_taxonomy, \
|
||||||
obi_taxo_get_taxon_with_taxid, \
|
obi_taxo_get_taxon_with_taxid, \
|
||||||
obi_taxonomy_add_local_taxon, \
|
obi_taxo_add_local_taxon, \
|
||||||
|
obi_taxo_add_preferred_name_with_taxon, \
|
||||||
ecotx_t
|
ecotx_t
|
||||||
|
|
||||||
|
|
||||||
from ._obidms cimport OBIDMS
|
from ._obidms cimport OBIDMS
|
||||||
|
|
||||||
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
||||||
@ -42,7 +42,7 @@ cdef class OBI_Taxonomy :
|
|||||||
if taxon_p == NULL :
|
if taxon_p == NULL :
|
||||||
raise Exception("Taxon not found")
|
raise Exception("Taxon not found")
|
||||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||||
return OBI_Taxon(taxon_capsule)
|
return OBI_Taxon(taxon_capsule, self)
|
||||||
else :
|
else :
|
||||||
raise Exception("Not implemented")
|
raise Exception("Not implemented")
|
||||||
|
|
||||||
@ -60,7 +60,7 @@ cdef class OBI_Taxonomy :
|
|||||||
for t in range(self._pointer.taxa.count):
|
for t in range(self._pointer.taxa.count):
|
||||||
taxon_p = <ecotx_t*> (taxa+t)
|
taxon_p = <ecotx_t*> (taxa+t)
|
||||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||||
yield OBI_Taxon(taxon_capsule)
|
yield OBI_Taxon(taxon_capsule, self)
|
||||||
|
|
||||||
|
|
||||||
cpdef write(self, str prefix) :
|
cpdef write(self, str prefix) :
|
||||||
@ -70,7 +70,7 @@ cdef class OBI_Taxonomy :
|
|||||||
|
|
||||||
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
|
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
|
||||||
cdef int taxid
|
cdef int taxid
|
||||||
taxid = obi_taxonomy_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
|
taxid = obi_taxo_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
|
||||||
if taxid < 0 :
|
if taxid < 0 :
|
||||||
raise Exception("Error adding a new taxon to the taxonomy")
|
raise Exception("Error adding a new taxon to the taxonomy")
|
||||||
else :
|
else :
|
||||||
@ -85,10 +85,11 @@ cdef class OBI_Taxonomy :
|
|||||||
|
|
||||||
cdef class OBI_Taxon : # TODO dict subclass?
|
cdef class OBI_Taxon : # TODO dict subclass?
|
||||||
|
|
||||||
def __init__(self, object taxon_capsule) :
|
def __init__(self, object taxon_capsule, OBI_Taxonomy tax) :
|
||||||
self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
|
self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
|
||||||
if self._pointer == NULL :
|
if self._pointer == NULL :
|
||||||
raise Exception("Error reading the taxonomy")
|
raise Exception("Error reading a taxon (NULL pointer)")
|
||||||
|
self._tax = tax
|
||||||
|
|
||||||
# name property getter
|
# name property getter
|
||||||
@property
|
@property
|
||||||
@ -115,12 +116,23 @@ cdef class OBI_Taxon : # TODO dict subclass?
|
|||||||
def parent(self):
|
def parent(self):
|
||||||
cdef object parent_capsule
|
cdef object parent_capsule
|
||||||
parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
|
parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
|
||||||
return OBI_Taxon(parent_capsule)
|
return OBI_Taxon(parent_capsule, self._tax)
|
||||||
|
|
||||||
|
# preferred name property getter and setter
|
||||||
|
@property
|
||||||
|
def preferred_name(self):
|
||||||
|
if self._pointer.preferred_name != NULL :
|
||||||
|
return bytes2str(self._pointer.preferred_name)
|
||||||
|
@preferred_name.setter
|
||||||
|
def preferred_name(self, str new_preferred_name) : # @DuplicatedSignature
|
||||||
|
if (obi_taxo_add_preferred_name_with_taxon(self._tax._pointer, self._pointer, str2bytes(new_preferred_name)) < 0) :
|
||||||
|
raise Exception("Error adding a new preferred name to a taxon")
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
d = {}
|
d = {}
|
||||||
d['taxid'] = self.taxid
|
d['taxid'] = self.taxid
|
||||||
d['name'] = self.name
|
d['name'] = self.name
|
||||||
|
d['preferred name'] = self.preferred_name
|
||||||
d['parent'] = self.parent.taxid
|
d['parent'] = self.parent.taxid
|
||||||
d['farest'] = self.farest
|
d['farest'] = self.farest
|
||||||
return str(d)
|
return str(d)
|
||||||
|
@ -13,6 +13,7 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
|||||||
int32_t farest
|
int32_t farest
|
||||||
ecotxnode* parent
|
ecotxnode* parent
|
||||||
char* name
|
char* name
|
||||||
|
char* preferred_name
|
||||||
|
|
||||||
ctypedef ecotxnode ecotx_t
|
ctypedef ecotxnode ecotx_t
|
||||||
|
|
||||||
@ -56,4 +57,9 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
|||||||
|
|
||||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||||
|
|
||||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||||
|
|
||||||
|
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
|
||||||
|
|
||||||
|
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
|
||||||
|
|
||||||
|
@ -246,7 +246,7 @@ FILE* open_ecorecorddb(const char* file_name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ecorankidx_t* read_rankidx(const char* ranks_file_name)
|
ecorankidx_t* read_ranks_idx(const char* ranks_file_name)
|
||||||
{
|
{
|
||||||
int32_t count;
|
int32_t count;
|
||||||
FILE* ranks_file;
|
FILE* ranks_file;
|
||||||
@ -301,7 +301,7 @@ ecorankidx_t* read_rankidx(const char* ranks_file_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name)
|
ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name)
|
||||||
{
|
{
|
||||||
int32_t count_taxa;
|
int32_t count_taxa;
|
||||||
int32_t count_local_taxa;
|
int32_t count_local_taxa;
|
||||||
@ -341,10 +341,12 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
|
|||||||
for (i=0; i<count_taxa; i++)
|
for (i=0; i<count_taxa; i++)
|
||||||
{
|
{
|
||||||
readnext_ecotaxon(f_taxa, &(taxa_index->taxon[i]));
|
readnext_ecotaxon(f_taxa, &(taxa_index->taxon[i]));
|
||||||
|
taxa_index->taxon[i].idx = i;
|
||||||
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
|
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
|
||||||
taxa_index->taxon[i].parent->farest = 0;
|
taxa_index->taxon[i].parent->farest = 0;
|
||||||
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
|
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
|
||||||
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
|
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
|
||||||
|
taxa_index->taxon[i].preferred_name = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (count_local_taxa > 0)
|
if (count_local_taxa > 0)
|
||||||
@ -361,6 +363,7 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
|
|||||||
taxa_index->taxon[i].parent->farest=0;
|
taxa_index->taxon[i].parent->farest=0;
|
||||||
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
|
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
|
||||||
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
|
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
|
||||||
|
taxa_index->taxon[i].preferred_name = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i=0; i < count_taxa; i++)
|
for (i=0; i < count_taxa; i++)
|
||||||
@ -431,7 +434,60 @@ econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
|
||||||
|
{
|
||||||
|
econameformat_t* raw;
|
||||||
|
int32_t record_length;
|
||||||
|
|
||||||
|
raw = read_ecorecord(f, &record_length);
|
||||||
|
if (raw == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
name->is_scientific_name = raw->is_scientific_name;
|
||||||
|
|
||||||
|
name->name = malloc((raw->name_length + 1) * sizeof(char));
|
||||||
|
if (name->name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for a taxon preferred name");
|
||||||
|
free(raw);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
strncpy(name->name, raw->names, raw->name_length);
|
||||||
|
name->name[raw->name_length] = 0;
|
||||||
|
|
||||||
|
name->class_name = malloc((raw->class_length+1) * sizeof(char));
|
||||||
|
if (name->class_name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for a taxon class name");
|
||||||
|
free(name->name);
|
||||||
|
free(raw);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
|
||||||
|
name->class_name[raw->class_length] = 0;
|
||||||
|
|
||||||
|
name->taxon = taxonomy->taxa->taxon + raw->taxid;
|
||||||
|
|
||||||
|
// Add the preferred name in the taxon structure // TODO discuss: couldn't they all use the same pointer?
|
||||||
|
(taxonomy->taxa->taxon + raw->taxid)->preferred_name = malloc((raw->name_length + 1) * sizeof(char));
|
||||||
|
if ((taxonomy->taxa->taxon + raw->taxid)->preferred_name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for a taxon preferred name");
|
||||||
|
free(name->name);
|
||||||
|
free(name->class_name);
|
||||||
|
free(raw);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
strcpy((taxonomy->taxa->taxon + raw->taxid)->preferred_name, name->name);
|
||||||
|
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||||
{
|
{
|
||||||
int32_t count;
|
int32_t count;
|
||||||
FILE* f;
|
FILE* f;
|
||||||
@ -440,10 +496,7 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
|||||||
|
|
||||||
f = open_ecorecorddb(file_name, &count, 0);
|
f = open_ecorecorddb(file_name, &count, 0);
|
||||||
if (f == NULL)
|
if (f == NULL)
|
||||||
{
|
|
||||||
obidebug(1, "\nError reading taxonomy name file");
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
|
|
||||||
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
|
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
|
||||||
if (index_names == NULL)
|
if (index_names == NULL)
|
||||||
@ -473,9 +526,46 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||||
|
{
|
||||||
|
int32_t count;
|
||||||
|
FILE* f;
|
||||||
|
econameidx_t* index_names;
|
||||||
|
int32_t i;
|
||||||
|
|
||||||
|
f = open_ecorecorddb(file_name, &count, 0);
|
||||||
|
if (f == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
|
||||||
|
if (index_names == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError reading taxonomy name file");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
index_names->count = count;
|
||||||
|
|
||||||
|
for (i=0; i < count; i++)
|
||||||
|
{
|
||||||
|
readnext_ecopreferredname(f, (index_names->names)+i, taxonomy);
|
||||||
|
if ((index_names->names)+i == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError reading taxonomy name file");
|
||||||
|
free(index_names);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
return index_names;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||||
{
|
{
|
||||||
int32_t count;
|
int32_t count;
|
||||||
FILE* f;
|
FILE* f;
|
||||||
@ -528,7 +618,7 @@ ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy
|
|||||||
|
|
||||||
// Functions to write taxonomy structure to binary files
|
// Functions to write taxonomy structure to binary files
|
||||||
|
|
||||||
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
|
int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
char* file_name;
|
char* file_name;
|
||||||
@ -631,7 +721,7 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
char* file_name;
|
char* file_name;
|
||||||
@ -905,7 +995,7 @@ int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* ta
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
char* file_name;
|
char* file_name;
|
||||||
@ -1053,7 +1143,155 @@ int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
|
int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
char* file_name;
|
||||||
|
int file_descriptor;
|
||||||
|
off_t file_size;
|
||||||
|
char* taxonomy_path;
|
||||||
|
int32_t name_length;
|
||||||
|
int32_t class_length;
|
||||||
|
int32_t record_size;
|
||||||
|
|
||||||
|
// Compute file size
|
||||||
|
file_size = sizeof(int32_t); // To store record count
|
||||||
|
for (i=0; i < (tax->preferred_names)->count; i++)
|
||||||
|
{
|
||||||
|
file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length
|
||||||
|
file_size = file_size + strlen(tax->preferred_names->names[i].name); // To store name
|
||||||
|
file_size = file_size + strlen(tax->preferred_names->names[i].class_name); // To store name
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the taxonomy directory path
|
||||||
|
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
|
||||||
|
|
||||||
|
file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
|
||||||
|
if (file_name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating the memory for a binary taxonomy file name");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the file path
|
||||||
|
if (sprintf(file_name, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError building a binary taxonomy file name");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(taxonomy_path);
|
||||||
|
|
||||||
|
// Create file
|
||||||
|
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
|
||||||
|
if (file_descriptor < 0)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError creating a binary taxonomy file");
|
||||||
|
free(file_name);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(file_name);
|
||||||
|
|
||||||
|
// Truncate the file to the right size
|
||||||
|
if (ftruncate(file_descriptor, file_size) < 0)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError truncating a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write record count
|
||||||
|
if (write(file_descriptor, &(tax->preferred_names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write records
|
||||||
|
for (i=0; i < tax->preferred_names->count; i++)
|
||||||
|
{
|
||||||
|
name_length = strlen(tax->preferred_names->names[i].name);
|
||||||
|
class_length = strlen(tax->preferred_names->names[i].class_name);
|
||||||
|
record_size = 4*sizeof(int32_t) + name_length + class_length;
|
||||||
|
|
||||||
|
// Write record size
|
||||||
|
if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Write if the name is a scientific name
|
||||||
|
if (write(file_descriptor, &(tax->preferred_names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Write name length
|
||||||
|
if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Write class length
|
||||||
|
if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Write taxid index
|
||||||
|
if (write(file_descriptor, &(tax->preferred_names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Write name
|
||||||
|
if (write(file_descriptor, tax->preferred_names->names[i].name, name_length) < ((ssize_t) name_length))
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Write class
|
||||||
|
if (write(file_descriptor, tax->preferred_names->names[i].class_name, class_length) < ((ssize_t) class_length))
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError writing in a binary taxonomy file");
|
||||||
|
close(file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close file
|
||||||
|
if (close(file_descriptor) < 0)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError closing a pdx taxonomy file");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
char* file_name;
|
char* file_name;
|
||||||
@ -1182,19 +1420,22 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
|
|||||||
|
|
||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
|
|
||||||
if (write_rankidx(dms, tax, tax_name) < 0)
|
if (write_ranks_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if (write_taxonomyidx(dms, tax, tax_name) < 0)
|
if (write_taxonomy_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if (write_nameidx(dms, tax, tax_name) < 0)
|
if (write_names_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if (write_mergedidx(dms, tax, tax_name) < 0)
|
if (write_merged_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
// Check if there are local taxa (if so last taxon is local)
|
// Check if there are local taxa (if so last taxon is local)
|
||||||
if ((tax->taxa)->local_count > 0)
|
if ((tax->taxa)->local_count > 0)
|
||||||
if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
|
if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
// Write preferred names if there are some
|
||||||
|
if (tax->preferred_names != NULL)
|
||||||
|
if (write_preferred_names_idx(dms, tax, tax_name) < 0)
|
||||||
|
return -1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2117,6 +2358,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
|
|||||||
tax->ranks = NULL;
|
tax->ranks = NULL;
|
||||||
tax->taxa = NULL;
|
tax->taxa = NULL;
|
||||||
tax->names = NULL;
|
tax->names = NULL;
|
||||||
|
tax->preferred_names = NULL;
|
||||||
tax->merged_idx = NULL;
|
tax->merged_idx = NULL;
|
||||||
|
|
||||||
tax->dms = NULL;
|
tax->dms = NULL;
|
||||||
@ -2295,6 +2537,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
|
|||||||
(((tax->taxa)->taxon)[i].parent)->farest = 0;
|
(((tax->taxa)->taxon)[i].parent)->farest = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize preferred names to NULL
|
||||||
|
for (i=0; i < (tax->taxa)->count; i++)
|
||||||
|
((tax->taxa)->taxon)[i].preferred_name = NULL;
|
||||||
|
|
||||||
(tax->taxa)->buffer_size = (tax->taxa)->count;
|
(tax->taxa)->buffer_size = (tax->taxa)->count;
|
||||||
|
|
||||||
// Compute longest branches (used to compute distances between taxa faster)
|
// Compute longest branches (used to compute distances between taxa faster)
|
||||||
@ -2328,7 +2574,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||||
{
|
{
|
||||||
int32_t taxid;
|
int32_t taxid;
|
||||||
ecotx_t* taxon;
|
ecotx_t* taxon;
|
||||||
@ -2436,6 +2682,81 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
|
||||||
|
{
|
||||||
|
ecotx_t* taxon;
|
||||||
|
|
||||||
|
taxon = obi_taxo_get_taxon_with_taxid(tax, taxid);
|
||||||
|
|
||||||
|
return obi_taxo_add_preferred_name_with_taxon(tax, taxon, preferred_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
|
||||||
|
{
|
||||||
|
econame_t* name_struct;
|
||||||
|
|
||||||
|
// Free previous preferred name if there is one
|
||||||
|
if (taxon->preferred_name != NULL)
|
||||||
|
free(taxon->preferred_name);
|
||||||
|
|
||||||
|
taxon->preferred_name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
|
||||||
|
if (taxon->preferred_name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for a new preferred name for a taxon");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
strcpy(taxon->preferred_name, preferred_name);
|
||||||
|
|
||||||
|
// Add new name in preferred names structure
|
||||||
|
// Allocate or reallocate memory for new name
|
||||||
|
if (tax->preferred_names == NULL)
|
||||||
|
{
|
||||||
|
tax->preferred_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t));
|
||||||
|
(tax->preferred_names)->count = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
tax->preferred_names = (econameidx_t*) realloc(tax->preferred_names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->preferred_names)->count + 1));
|
||||||
|
if (tax->preferred_names == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new preferred name");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add new preferred name
|
||||||
|
name_struct = (tax->preferred_names)->names + ((tax->preferred_names)->count);
|
||||||
|
name_struct->name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
|
||||||
|
if (name_struct->name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for a new taxon preferred name");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
strcpy(name_struct->name, preferred_name);
|
||||||
|
|
||||||
|
name_struct->class_name = (char*) malloc((strlen("preferred name") + 1) * sizeof(char));
|
||||||
|
if (name_struct->class_name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for a taxon class name to add a new preferred name");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
strcpy(name_struct->class_name, "preferred name");
|
||||||
|
name_struct->is_scientific_name = false;
|
||||||
|
name_struct->taxon = taxon;
|
||||||
|
|
||||||
|
// Sort preferred names in alphabetical order
|
||||||
|
qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names);
|
||||||
|
|
||||||
|
// Update preferred name count
|
||||||
|
((tax->preferred_names)->count)++;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/////// PUBLIC /////////
|
/////// PUBLIC /////////
|
||||||
|
|
||||||
|
|
||||||
@ -2448,6 +2769,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
char* merged_idx_file_name;
|
char* merged_idx_file_name;
|
||||||
char* local_taxa_file_name;
|
char* local_taxa_file_name;
|
||||||
char* alter_names_file_name;
|
char* alter_names_file_name;
|
||||||
|
char* pref_names_file_name;
|
||||||
int buffer_size;
|
int buffer_size;
|
||||||
|
|
||||||
tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
|
tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
|
||||||
@ -2461,6 +2783,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
tax->ranks = NULL;
|
tax->ranks = NULL;
|
||||||
tax->taxa = NULL;
|
tax->taxa = NULL;
|
||||||
tax->names = NULL;
|
tax->names = NULL;
|
||||||
|
tax->preferred_names = NULL;
|
||||||
tax->merged_idx = NULL;
|
tax->merged_idx = NULL;
|
||||||
|
|
||||||
tax->dms = dms;
|
tax->dms = dms;
|
||||||
@ -2492,7 +2815,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
free(tax);
|
free(tax);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
tax->ranks = read_rankidx(ranks_file_name);
|
tax->ranks = read_ranks_idx(ranks_file_name);
|
||||||
if (tax->ranks == NULL)
|
if (tax->ranks == NULL)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
@ -2543,7 +2866,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
obi_close_taxonomy(tax);
|
obi_close_taxonomy(tax);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name);
|
tax->taxa = read_taxonomy_idx(taxa_file_name, local_taxa_file_name);
|
||||||
if (tax->taxa == NULL)
|
if (tax->taxa == NULL)
|
||||||
{
|
{
|
||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
@ -2574,7 +2897,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
obi_close_taxonomy(tax);
|
obi_close_taxonomy(tax);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
tax->merged_idx = read_mergedidx(merged_idx_file_name, tax);
|
tax->merged_idx = read_merged_idx(merged_idx_file_name, tax);
|
||||||
if (tax->merged_idx == NULL)
|
if (tax->merged_idx == NULL)
|
||||||
{
|
{
|
||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
@ -2584,6 +2907,38 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
}
|
}
|
||||||
free(merged_idx_file_name);
|
free(merged_idx_file_name);
|
||||||
|
|
||||||
|
// Read preferred names
|
||||||
|
pref_names_file_name = (char*) malloc(buffer_size*sizeof(char));
|
||||||
|
if (pref_names_file_name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for alternative names file name");
|
||||||
|
free(taxonomy_path);
|
||||||
|
obi_close_taxonomy(tax);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (snprintf(pref_names_file_name, buffer_size, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||||
|
obidebug(1, "\nError building alternative names file name");
|
||||||
|
free(taxonomy_path);
|
||||||
|
free(pref_names_file_name);
|
||||||
|
obi_close_taxonomy(tax);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
tax->preferred_names = read_preferred_names_idx(pref_names_file_name, tax);
|
||||||
|
if (obi_errno)
|
||||||
|
{
|
||||||
|
free(taxonomy_path);
|
||||||
|
free(pref_names_file_name);
|
||||||
|
obi_close_taxonomy(tax);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
free(pref_names_file_name);
|
||||||
|
|
||||||
|
if (tax->preferred_names != NULL)
|
||||||
|
fprintf(stderr, "\nPreferred names read");
|
||||||
|
|
||||||
// Read alternative names
|
// Read alternative names
|
||||||
if (read_alternative_names)
|
if (read_alternative_names)
|
||||||
{
|
{
|
||||||
@ -2605,7 +2960,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
obi_close_taxonomy(tax);
|
obi_close_taxonomy(tax);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
tax->names = read_nameidx(alter_names_file_name, tax);
|
tax->names = read_names_idx(alter_names_file_name, tax);
|
||||||
if (tax->names == NULL)
|
if (tax->names == NULL)
|
||||||
{
|
{
|
||||||
free(taxonomy_path);
|
free(taxonomy_path);
|
||||||
@ -2637,6 +2992,10 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
|||||||
if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
|
if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
// Write preferred names if there are some
|
||||||
|
if (taxonomy->preferred_names != NULL)
|
||||||
|
if (write_preferred_names_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
if (taxonomy)
|
if (taxonomy)
|
||||||
{
|
{
|
||||||
|
@ -36,7 +36,8 @@ typedef struct ecotxnode {
|
|||||||
int32_t farest;
|
int32_t farest;
|
||||||
int32_t idx;
|
int32_t idx;
|
||||||
struct ecotxnode* parent;
|
struct ecotxnode* parent;
|
||||||
char* name;
|
char* name; // scientific name
|
||||||
|
char* preferred_name; // preferred name
|
||||||
bool local;
|
bool local;
|
||||||
} ecotx_t;
|
} ecotx_t;
|
||||||
|
|
||||||
@ -98,6 +99,7 @@ typedef struct OBIDMS_taxonomy_t {
|
|||||||
ecomergedidx_t* merged_idx;
|
ecomergedidx_t* merged_idx;
|
||||||
ecorankidx_t* ranks;
|
ecorankidx_t* ranks;
|
||||||
econameidx_t* names;
|
econameidx_t* names;
|
||||||
|
econameidx_t* preferred_names;
|
||||||
ecotxidx_t* taxa;
|
ecotxidx_t* taxa;
|
||||||
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
||||||
|
|
||||||
@ -127,4 +129,11 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
|
|||||||
|
|
||||||
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
||||||
|
|
||||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
||||||
|
|
||||||
|
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
|
||||||
|
|
||||||
|
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user