Added C functions to write .rdx, .tdx, .ndx binary taxonomy files from a

taxonomy C structure
This commit is contained in:
Celine Mercier
2016-10-14 17:03:10 +02:00
parent 0dfd67ec89
commit b63d0fb9fb
7 changed files with 516 additions and 44 deletions

View File

@ -1,14 +1,17 @@
#cython: language_level=3 #cython: language_level=3
from .capi.obitaxonomy cimport ecotx_t, OBIDMS_taxonomy_p from .capi.obitaxonomy cimport ecotx_t, OBIDMS_taxonomy_p
from ._obidms cimport OBIDMS
cdef class OBI_Taxonomy : cdef class OBI_Taxonomy :
cdef str _name cdef str _name
cdef OBIDMS_taxonomy_p _pointer cdef OBIDMS_taxonomy_p _pointer
cdef OBIDMS _dms
cpdef close(self) cpdef close(self)
cpdef _write(self, str prefix)
cdef class OBI_Taxon : cdef class OBI_Taxon :

View File

@ -4,7 +4,10 @@ from obitools3.utils cimport bytes2str, str2bytes
from .capi.obitaxonomy cimport obi_read_taxonomy, \ from .capi.obitaxonomy cimport obi_read_taxonomy, \
obi_close_taxonomy, \ obi_close_taxonomy, \
obi_taxo_get_taxon_with_taxid obi_taxo_get_taxon_with_taxid, \
write_rankidx, \
write_taxonomyidx, \
write_nameidx
from ._obidms cimport OBIDMS from ._obidms cimport OBIDMS
@ -18,6 +21,7 @@ cdef class OBI_Taxonomy :
def __init__(self, OBIDMS dms, str name) : def __init__(self, OBIDMS dms, str name) :
self._dms = dms
self._name = name self._name = name
self._pointer = obi_read_taxonomy(dms._pointer, str2bytes(name), True) # TODO discuss self._pointer = obi_read_taxonomy(dms._pointer, str2bytes(name), True) # TODO discuss
# TODO if not found in DMS, try to import? # TODO if not found in DMS, try to import?
@ -39,7 +43,16 @@ cdef class OBI_Taxonomy :
cpdef close(self) : cpdef close(self) :
if (obi_close_taxonomy(self._pointer) < 0) : if (obi_close_taxonomy(self._pointer) < 0) :
raise Exception("Error closing the taxonomy") raise Exception("Error closing the taxonomy")
cpdef _write(self, str prefix) :
if (write_rankidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
raise Exception("Error writing the taxonomy rank file")
if (write_taxonomyidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
raise Exception("Error writing the taxonomy taxa file")
if (write_nameidx(self._dms._pointer, self._pointer, str2bytes(prefix)) < 0) :
raise Exception("Error writing the taxonomy taxa file")
cdef class OBI_Taxon : # TODO dict subclass? cdef class OBI_Taxon : # TODO dict subclass?
@ -82,6 +95,6 @@ cdef class OBI_Taxon : # TODO dict subclass?
d['parent'] = self.parent.taxid d['parent'] = self.parent.taxid
d['farest'] = self.farest d['farest'] = self.farest
return str(d) return str(d)

View File

@ -40,3 +40,7 @@ cdef extern from "obidms_taxonomy.h" nogil:
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const_char_p taxonomy_name)

View File

@ -497,6 +497,33 @@ OBIDMS_p obi_open_dms(const char* dms_path)
return NULL; return NULL;
} }
// Open the taxonomy directory
dms->tax_directory = opendir_in_dms(dms, TAXONOMY_DIR_NAME);
if (dms->tax_directory == NULL)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError opening the taxonomy directory");
closedir(dms->indexer_directory);
closedir(dms->view_directory);
closedir(dms->directory);
free(dms);
return NULL;
}
// Store the taxonomy directory's file descriptor
dms->tax_dir_fd = dirfd(dms->tax_directory);
if (dms->tax_dir_fd < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError getting the file descriptor of the taxonomy directory");
closedir(dms->indexer_directory);
closedir(dms->tax_directory);
closedir(dms->view_directory);
closedir(dms->directory);
free(dms);
return NULL;
}
// Initialize the list of opened columns // Initialize the list of opened columns
dms->opened_columns = (Opened_columns_list_p) malloc(sizeof(Opened_columns_list_t)); dms->opened_columns = (Opened_columns_list_p) malloc(sizeof(Opened_columns_list_t));
(dms->opened_columns)->nb_opened_columns = 0; (dms->opened_columns)->nb_opened_columns = 0;
@ -536,7 +563,7 @@ int obi_close_dms(OBIDMS_p dms)
while ((dms->opened_columns)->nb_opened_columns > 0) while ((dms->opened_columns)->nb_opened_columns > 0)
obi_close_column(*((dms->opened_columns)->columns)); obi_close_column(*((dms->opened_columns)->columns));
// Close dms, and view and indexer directories // Close dms, and view, indexer and taxonomy directories
if (closedir(dms->indexer_directory) < 0) if (closedir(dms->indexer_directory) < 0)
{ {
obi_set_errno(OBI_INDEXER_ERROR); obi_set_errno(OBI_INDEXER_ERROR);
@ -551,6 +578,13 @@ int obi_close_dms(OBIDMS_p dms)
free(dms); free(dms);
return -1; return -1;
} }
if (closedir(dms->tax_directory) < 0)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError closing a taxonomy directory");
free(dms);
return -1;
}
if (closedir(dms->directory) < 0) if (closedir(dms->directory) < 0)
{ {
obi_set_errno(OBIDMS_MEMORY_ERROR); obi_set_errno(OBIDMS_MEMORY_ERROR);

View File

@ -106,6 +106,12 @@ typedef struct OBIDMS {
int view_dir_fd; /**< The file descriptor of the directory entry int view_dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the view directory. * usable to refer and scan the view directory.
*/ */
DIR* tax_directory; /**< A directory entry usable to
* refer and scan the taxonomy directory.
*/
int tax_dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the taxonomy directory.
*/
bool little_endian; /**< Endianness of the database. bool little_endian; /**< Endianness of the database.
*/ */
Opened_columns_list_p opened_columns; /**< List of opened columns. Opened_columns_list_p opened_columns; /**< List of opened columns.

View File

@ -51,6 +51,27 @@ int compareRankLabel(const void *label1, const void *label2)
} }
char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name)
{
char* all_tax_dir_path;
char* tax_path;
all_tax_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME);
tax_path = (char*) malloc((strlen(all_tax_dir_path) + strlen(tax_name) + 2)*sizeof(char));
if (sprintf(tax_path, "%s/%s", all_tax_dir_path, tax_name) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError building taxonomy path");
free(all_tax_dir_path);
return NULL;
}
free(all_tax_dir_path);
return tax_path;
}
int32_t rank_index(const char* label, ecorankidx_t* ranks) int32_t rank_index(const char* label, ecorankidx_t* ranks)
{ {
char **rep; char **rep;
@ -58,7 +79,7 @@ int32_t rank_index(const char* label, ecorankidx_t* ranks)
rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), compareRankLabel); rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), compareRankLabel);
if (rep) if (rep)
return rep-ranks->label; // TODO what??? return rep-ranks->label;
return -1; return -1;
} }
@ -93,8 +114,8 @@ void* read_ecorecord(FILE* f, int32_t* record_size)
} }
// if (!(obi_is_little_endian())) // TODO // if (!(obi_is_little_endian())) // TODO
if (is_big_endian()) // if (is_big_endian())
*record_size=swap_int32_t(*record_size); // *record_size=swap_int32_t(*record_size);
if (buffer_size < *record_size) if (buffer_size < *record_size)
{ {
@ -137,13 +158,13 @@ ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon)
return NULL; return NULL;
// if (!(obi_is_little_endian())) // TODO // if (!(obi_is_little_endian())) // TODO
if (is_big_endian()) // if (is_big_endian())
{ // {
raw->name_length = swap_int32_t(raw->name_length); // raw->name_length = swap_int32_t(raw->name_length);
raw->parent = swap_int32_t(raw->parent); // raw->parent = swap_int32_t(raw->parent);
raw->rank = swap_int32_t(raw->rank); // raw->rank = swap_int32_t(raw->rank);
raw->taxid = swap_int32_t(raw->taxid); // raw->taxid = swap_int32_t(raw->taxid);
} // }
taxon->parent = (ecotx_t*) ((size_t) raw->parent); taxon->parent = (ecotx_t*) ((size_t) raw->parent);
taxon->taxid = raw->taxid; taxon->taxid = raw->taxid;
@ -195,8 +216,8 @@ FILE* open_ecorecorddb(const char* file_name,
} }
// if (!(obi_is_little_endian())) // TODO // if (!(obi_is_little_endian())) // TODO
if (is_big_endian()) // if (is_big_endian())
*count = swap_int32_t(*count); // *count = swap_int32_t(*count);
return f; return f;
} }
@ -225,6 +246,7 @@ ecorankidx_t* read_rankidx(const char* ranks_file_name)
buffer = read_ecorecord(ranks_file, &rank_length); buffer = read_ecorecord(ranks_file, &rank_length);
ranks_index->label[i] = (char*) malloc(rank_length+1); ranks_index->label[i] = (char*) malloc(rank_length+1);
strncpy(ranks_index->label[i], buffer, rank_length); strncpy(ranks_index->label[i], buffer, rank_length);
(ranks_index->label[i])[rank_length] = 0;
} }
return ranks_index; return ranks_index;
@ -277,6 +299,7 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
for (; i < count_taxa; i++){ for (; i < count_taxa; i++){
readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i])); readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i]));
taxa_index->taxon[i].idx = i;
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
taxa_index->taxon[i].parent->farest=0; taxa_index->taxon[i].parent->farest=0;
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
@ -321,13 +344,13 @@ econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy
return NULL; return NULL;
// if (!(obi_is_little_endian())) // TODO // if (!(obi_is_little_endian())) // TODO
if (is_big_endian()) // if (is_big_endian())
{ // {
raw->is_scientific_name = swap_int32_t(raw->is_scientific_name); // raw->is_scientific_name = swap_int32_t(raw->is_scientific_name);
raw->name_length = swap_int32_t(raw->name_length); // raw->name_length = swap_int32_t(raw->name_length);
raw->class_length = swap_int32_t(raw->class_length); // raw->class_length = swap_int32_t(raw->class_length);
raw->taxid = swap_int32_t(raw->taxid); // raw->taxid = swap_int32_t(raw->taxid);
} // }
name->is_scientific_name = raw->is_scientific_name; name->is_scientific_name = raw->is_scientific_name;
@ -382,7 +405,6 @@ static int bcomptaxon (const void* ptaxid, const void* ptaxon)
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names) OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names)
{ {
OBIDMS_taxonomy_p tax; OBIDMS_taxonomy_p tax;
char* main_taxonomy_dir_path;
char* taxonomy_path; char* taxonomy_path;
char* ranks_file_name; char* ranks_file_name;
char* taxa_file_name; char* taxa_file_name;
@ -398,16 +420,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
buffer_size = 2048; // TODO buffer_size = 2048; // TODO
main_taxonomy_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME); taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
taxonomy_path = (char*) malloc((strlen(main_taxonomy_dir_path) + strlen(taxonomy_name) + strlen(taxonomy_name) + 3)*sizeof(char));
if (sprintf(taxonomy_path, "%s/%s/%s", main_taxonomy_dir_path, taxonomy_name, taxonomy_name) < 0)
{
free(main_taxonomy_dir_path);
obi_close_taxonomy(tax);
return NULL;
}
free(main_taxonomy_dir_path);
// Read ranks // Read ranks
ranks_file_name = (char*) malloc(buffer_size*sizeof(char)); ranks_file_name = (char*) malloc(buffer_size*sizeof(char));
@ -417,7 +430,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
obi_close_taxonomy(tax); obi_close_taxonomy(tax);
return NULL; return NULL;
} }
if (snprintf(ranks_file_name, buffer_size, "%s.rdx", taxonomy_path) < 0) if (snprintf(ranks_file_name, buffer_size, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0)
{ {
free(taxonomy_path); free(taxonomy_path);
free(ranks_file_name); free(ranks_file_name);
@ -441,7 +454,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
obi_close_taxonomy(tax); obi_close_taxonomy(tax);
return NULL; return NULL;
} }
if (snprintf(taxa_file_name, buffer_size,"%s.tdx", taxonomy_path) < 0) if (snprintf(taxa_file_name, buffer_size, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0)
{ {
free(taxonomy_path); free(taxonomy_path);
free(taxa_file_name); free(taxa_file_name);
@ -456,7 +469,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
obi_close_taxonomy(tax); obi_close_taxonomy(tax);
return NULL; return NULL;
} }
if (snprintf(local_taxa_file_name, buffer_size,"%s.ldx", taxonomy_path) < 0) if (snprintf(local_taxa_file_name, buffer_size, "%s/%s.ldx", taxonomy_path, taxonomy_name) < 0)
{ {
free(taxonomy_path); free(taxonomy_path);
free(taxa_file_name); free(taxa_file_name);
@ -486,7 +499,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
obi_close_taxonomy(tax); obi_close_taxonomy(tax);
return NULL; return NULL;
} }
if (snprintf(alter_names_file_name, buffer_size,"%s.ndx", taxonomy_path) < 0) if (snprintf(alter_names_file_name, buffer_size, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0)
{ {
free(taxonomy_path); free(taxonomy_path);
free(alter_names_file_name); free(alter_names_file_name);
@ -514,7 +527,7 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
if (taxonomy) if (taxonomy)
{ {
if (taxonomy->ranks) if (taxonomy->ranks)
free(taxonomy->ranks); // TODO those don't free everything but mapping will replace anyway free(taxonomy->ranks); // TODO those don't free everything
if (taxonomy->names) if (taxonomy->names)
free(taxonomy->names); free(taxonomy->names);
@ -527,7 +540,7 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
return 0; return 0;
} }
// TODO no closing files? // close files
return 1; return 1;
} }
@ -699,3 +712,395 @@ ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
return obi_taxo_get_parent_at_rank(taxon, rankindex); return obi_taxo_get_parent_at_rank(taxon, rankindex);
} }
// Functions to write taxonomy structure to binary files
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct?
{
int i;
char* file_name;
int file_descriptor;
off_t file_size;
char* taxonomy_path;
int32_t length;
// Compute file size
file_size = sizeof(int32_t);
for (i=0; i < (tax->ranks)->count; i++)
{
file_size = file_size + sizeof(int32_t); // To store label size
file_size = file_size + strlen(((tax->ranks)->label)[i]); // To store label
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating the memory for a binary taxonomy file name");
return -1;
}
// Build the file path
if (sprintf(file_name, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError building a binary taxonomy file name");
return -1;
}
free(taxonomy_path);
// Create file
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (file_descriptor < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError creating a binary taxonomy file");
free(file_name);
return -1;
}
free(file_name);
// Truncate the file to the right size
if (ftruncate(file_descriptor, file_size) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError truncating a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write rank count
if (write(file_descriptor, &((tax->ranks)->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write ranks
for (i=0; i < (tax->ranks)->count; i++)
{
length = strlen(((tax->ranks)->label)[i]);
// Write rank size
if (write(file_descriptor, &length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write rank label
if (write(file_descriptor, ((tax->ranks)->label)[i], length) < ((ssize_t) length))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
}
// Close file
if (close(file_descriptor) < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError closing a DMS information file");
return -1;
}
return 0;
}
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
{
int i;
char* file_name;
int file_descriptor;
off_t file_size;
char* taxonomy_path;
int32_t name_length;
int32_t record_size;
// Compute file size
file_size = sizeof(int32_t); // To store record count
for (i=0; i < (tax->taxa)->count; i++)
{
file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen(tax->taxa->taxon[i].name); // To store name
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating the memory for a binary taxonomy file name");
return -1;
}
// Build the file path
if (sprintf(file_name, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError building a binary taxonomy file name");
return -1;
}
free(taxonomy_path);
// Create file
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (file_descriptor < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError creating a binary taxonomy file");
free(file_name);
return -1;
}
free(file_name);
// Truncate the file to the right size
if (ftruncate(file_descriptor, file_size) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError truncating a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write record count
if (write(file_descriptor, &(tax->taxa->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write records
for (i=0; i < tax->taxa->count; i++)
{
name_length = strlen(tax->taxa->taxon[i].name);
record_size = 4*sizeof(int32_t) + name_length;
// Write record size
if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write taxid
if (write(file_descriptor, &(tax->taxa->taxon[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write rank index
if (write(file_descriptor, &(tax->taxa->taxon[i].rank), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write parent index
if (write(file_descriptor, &((tax->taxa->taxon[i].parent)->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write name length
if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write name
if (write(file_descriptor, tax->taxa->taxon[i].name, name_length) < ((ssize_t) name_length))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
}
// Close file
if (close(file_descriptor) < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError closing a DMS information file");
return -1;
}
return 0;
}
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
{
int i;
char* file_name;
int file_descriptor;
off_t file_size;
char* taxonomy_path;
int32_t name_length;
int32_t class_length;
int32_t record_size;
// Compute file size
file_size = sizeof(int32_t); // To store record count
for (i=0; i < (tax->names)->count; i++)
{
file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen(tax->names->names[i].name); // To store name
file_size = file_size + strlen(tax->names->names[i].class_name); // To store name
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating the memory for a binary taxonomy file name");
return -1;
}
// Build the file path
if (sprintf(file_name, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError building a binary taxonomy file name");
return -1;
}
free(taxonomy_path);
// Create file
file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (file_descriptor < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError creating a binary taxonomy file");
free(file_name);
return -1;
}
free(file_name);
// Truncate the file to the right size
if (ftruncate(file_descriptor, file_size) < 0)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError truncating a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write record count
if (write(file_descriptor, &(tax->names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write records
for (i=0; i < tax->names->count; i++)
{
name_length = strlen(tax->names->names[i].name);
class_length = strlen(tax->names->names[i].class_name);
record_size = 4*sizeof(int32_t) + name_length + class_length;
// Write record size
if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write if the name is a scientific name
if (write(file_descriptor, &(tax->names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write name length
if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write class length
if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write taxid index
if (write(file_descriptor, &(tax->names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write name
if (write(file_descriptor, tax->names->names[i].name, name_length) < ((ssize_t) name_length))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
// Write class
if (write(file_descriptor, tax->names->names[i].class_name, class_length) < ((ssize_t) class_length))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError writing in a binary taxonomy file");
close(file_descriptor);
return -1;
}
}
// Close file
if (close(file_descriptor) < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError closing a DMS information file");
return -1;
}
return 0;
}

View File

@ -1,5 +1,5 @@
/******************************************************************** /********************************************************************
* OBIDMS taxonomy headeer file * * OBIDMS taxonomy header file *
********************************************************************/ ********************************************************************/
/** /**
@ -34,6 +34,7 @@ typedef struct ecotxnode {
int32_t taxid; int32_t taxid;
int32_t rank; int32_t rank;
int32_t farest; int32_t farest;
int32_t idx;
struct ecotxnode* parent; struct ecotxnode* parent;
char* name; char* name;
} ecotx_t; } ecotx_t;
@ -54,10 +55,10 @@ typedef struct {
typedef struct { typedef struct {
int32_t is_scientific_name; int32_t is_scientific_name;
int32_t name_length; int32_t name_length;
int32_t class_length; int32_t class_length;
int32_t taxid; int32_t taxid; // taxid idx
char names[1]; char names[1];
} econameformat_t; } econameformat_t;
@ -103,3 +104,9 @@ ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy); ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);