/******************************************************************** * OBIDMS taxonomy functions * ********************************************************************/ /** * @file obidms_taxonomy.c * @author Celine Mercier (celine.mercier@metabarcoding.org) * @date March 2nd 2016 * @brief Functions for reading binary taxonomy files. */ #include #include #include #include #include #include "obidms_taxonomy.h" #include "obidms.h" #include "obilittlebigman.h" // TODO the function from this checking the endianness does not seem to work properly #include "obidebug.h" #include "obierrno.h" #include "utils.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) // TODO : the malloc aren't checked but shouldn't exist for long because mapping instead // error checking and file closing in general aren't done properly yet // The endianness eventually shouldn't need checking too, as the machine will write the taxonomy with its endianness. int32_t is_big_endian() { int32_t i=1; return (int32_t)((char*)&i)[0]; } int32_t swap_int32_t(int32_t i) { return SWAPINT32(i); } int compareRankLabel(const void *label1, const void *label2) { return strcmp((const char*)label1,*(const char**)label2); } char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name) { char* all_tax_dir_path; char* tax_path; all_tax_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME); tax_path = (char*) malloc((strlen(all_tax_dir_path) + strlen(tax_name) + 2)*sizeof(char)); if (sprintf(tax_path, "%s/%s", all_tax_dir_path, tax_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building taxonomy path"); free(all_tax_dir_path); return NULL; } free(all_tax_dir_path); return tax_path; } int32_t rank_index(const char* label, ecorankidx_t* ranks) { char **rep; rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), compareRankLabel); if (rep) return rep-ranks->label; return -1; } void* read_ecorecord(FILE* f, int32_t* record_size) { static void* buffer = NULL; int32_t buffer_size = 0; int32_t read; if (!record_size) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: record_size can not be NULL"); return NULL; } read = fread(record_size, sizeof(int32_t), 1, f); if (feof(f)) return NULL; if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: error reading record size"); return NULL; } // if (!(obi_is_little_endian())) // TODO // if (is_big_endian()) // *record_size=swap_int32_t(*record_size); if (buffer_size < *record_size) { if (buffer) buffer = realloc(buffer, *record_size); else buffer = malloc(*record_size); if (buffer == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: error allocating memory"); return NULL; } } read = fread(buffer, *record_size, 1, f); if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: error reading a record %d, %d", read, *record_size); return NULL; } return buffer; }; ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon) { ecotxformat_t* raw; int32_t record_length; raw = read_ecorecord(f, &record_length); if (!raw) return NULL; // if (!(obi_is_little_endian())) // TODO // if (is_big_endian()) // { // raw->name_length = swap_int32_t(raw->name_length); // raw->parent = swap_int32_t(raw->parent); // raw->rank = swap_int32_t(raw->rank); // raw->taxid = swap_int32_t(raw->taxid); // } taxon->parent = (ecotx_t*) ((size_t) raw->parent); taxon->taxid = raw->taxid; taxon->rank = raw->rank; taxon->farest = -1; taxon->name = malloc((raw->name_length+1) * sizeof(char)); strncpy(taxon->name, raw->name, raw->name_length); return taxon; } FILE* open_ecorecorddb(const char* file_name, int32_t* count, int32_t abort_on_open_error) { FILE* f; int32_t read; f = fopen(file_name, "rb"); if (!f) { if (abort_on_open_error) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nCouldn't open a taxonomy file"); return NULL; } else { *count = 0; return NULL; } } read = fread(count, sizeof(int32_t), 1, f); if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy record size"); return NULL; } // if (!(obi_is_little_endian())) // TODO // if (is_big_endian()) // *count = swap_int32_t(*count); return f; } ecorankidx_t* read_rankidx(const char* ranks_file_name) { int32_t count; FILE* ranks_file; ecorankidx_t* ranks_index; int32_t i; int32_t rank_length; char* buffer; ranks_file = open_ecorecorddb(ranks_file_name, &count, 0); if (ranks_file==NULL) return NULL; ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * (count-1)); ranks_index->count = count; for (i=0; i < count; i++) { buffer = read_ecorecord(ranks_file, &rank_length); ranks_index->label[i] = (char*) malloc(rank_length+1); strncpy(ranks_index->label[i], buffer, rank_length); (ranks_index->label[i])[rank_length] = 0; } return ranks_index; } ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name) { int32_t count_taxa; int32_t count_local_taxa; FILE* f_taxa; FILE* f_local_taxa; ecotxidx_t* taxa_index; struct ecotxnode* t; int32_t i; int32_t j; f_taxa = open_ecorecorddb(taxa_file_name, &count_taxa,0); if (f_taxa == NULL) { obidebug(1, "\nError reading taxonomy taxa file"); return NULL; } f_local_taxa = open_ecorecorddb(local_taxa_file_name, &count_local_taxa, 0); taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa - 1)); taxa_index->count = count_taxa + count_local_taxa; taxa_index->buffer_size = taxa_index->count; taxa_index->max_taxid = 0; printf("Reading %d taxa...\n", count_taxa); for (i=0; itaxon[i])); taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent->farest = 0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) taxa_index->max_taxid = taxa_index->taxon[i].taxid; } if (count_local_taxa > 0) printf("Reading %d local taxa...\n", count_local_taxa); else printf("No local taxa\n"); count_taxa = taxa_index->count; for (; i < count_taxa; i++){ readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i])); taxa_index->taxon[i].idx = i; taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent->farest=0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) taxa_index->max_taxid = taxa_index->taxon[i].taxid; } printf("Computing longest branches...\n"); for (i=0; i < count_taxa; i++) { t = taxa_index->taxon+i; if (t->farest == -1) { t->farest=0; while (t->parent != t) { j = t->farest + 1; if (j > t->parent->farest) { t->parent->farest = j; t=t->parent; } else t = taxa_index->taxon; } } } return taxa_index; } econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy) { econameformat_t* raw; int32_t record_length; raw = read_ecorecord(f, &record_length); if (!raw) return NULL; // if (!(obi_is_little_endian())) // TODO // if (is_big_endian()) // { // raw->is_scientific_name = swap_int32_t(raw->is_scientific_name); // raw->name_length = swap_int32_t(raw->name_length); // raw->class_length = swap_int32_t(raw->class_length); // raw->taxid = swap_int32_t(raw->taxid); // } name->is_scientific_name = raw->is_scientific_name; name->name = malloc((raw->name_length + 1) * sizeof(char)); strncpy(name->name, raw->names, raw->name_length); name->name[raw->name_length] = 0; name->class_name = malloc((raw->class_length+1) * sizeof(char)); strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length); name->class_name[raw->class_length] = 0; name->taxon = taxonomy->taxa->taxon + raw->taxid; return name; } econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy) { int32_t count; FILE* f; econameidx_t* index_names; int32_t i; f = open_ecorecorddb(file_name, &count, 0); if (f == NULL) return NULL; index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * (count-1)); index_names->count = count; for (i=0; i < count; i++) readnext_econame(f, (index_names->names)+i, taxonomy); return index_names; } static int bcomptaxon (const void* ptaxid, const void* ptaxon) { ecotx_t* current_taxon = (ecotx_t*) ptaxon; int32_t taxid = (int32_t) ((size_t) ptaxid); return taxid - current_taxon->taxid; } /////// PUBLIC ///////// OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names) { OBIDMS_taxonomy_p tax; char* taxonomy_path; char* ranks_file_name; char* taxa_file_name; char* local_taxa_file_name; char* alter_names_file_name; int buffer_size; tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t)); tax->ranks = NULL; tax->taxa = NULL; tax->names = NULL; buffer_size = 2048; // TODO taxonomy_path = get_taxonomy_path(dms, taxonomy_name); // Read ranks ranks_file_name = (char*) malloc(buffer_size*sizeof(char)); if (ranks_file_name == NULL) { free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(ranks_file_name, buffer_size, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0) { free(taxonomy_path); free(ranks_file_name); obi_close_taxonomy(tax); return NULL; } tax->ranks = read_rankidx(ranks_file_name); if (tax->ranks == NULL) { free(ranks_file_name); obi_close_taxonomy(tax); return NULL; } free(ranks_file_name); // Read taxa taxa_file_name = (char*) malloc(buffer_size*sizeof(char)); if (taxa_file_name == NULL) { free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(taxa_file_name, buffer_size, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0) { free(taxonomy_path); free(taxa_file_name); obi_close_taxonomy(tax); return NULL; } local_taxa_file_name = (char*) malloc(buffer_size*sizeof(char)); if (local_taxa_file_name == NULL) { free(taxonomy_path); free(taxa_file_name); obi_close_taxonomy(tax); return NULL; } if (snprintf(local_taxa_file_name, buffer_size, "%s/%s.ldx", taxonomy_path, taxonomy_name) < 0) { free(taxonomy_path); free(taxa_file_name); free(local_taxa_file_name); obi_close_taxonomy(tax); return NULL; } tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name); if (tax->taxa == NULL) { free(taxonomy_path); free(taxa_file_name); free(local_taxa_file_name); obi_close_taxonomy(tax); return NULL; } free(taxa_file_name); free(local_taxa_file_name); // Read alternative names if (read_alternative_names) { alter_names_file_name = (char*) malloc(buffer_size*sizeof(char)); if (alter_names_file_name == NULL) { free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(alter_names_file_name, buffer_size, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0) { free(taxonomy_path); free(alter_names_file_name); obi_close_taxonomy(tax); return NULL; } tax->names = read_nameidx(alter_names_file_name, tax); if (tax->names == NULL) { free(alter_names_file_name); obi_close_taxonomy(tax); return NULL; } free(alter_names_file_name); } free(taxonomy_path); return tax; } int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy) { if (taxonomy) { if (taxonomy->ranks) free(taxonomy->ranks); // TODO those don't free everything if (taxonomy->names) free(taxonomy->names); if (taxonomy->taxa) free(taxonomy->taxa); free(taxonomy); return 0; } // close files return 1; } ////////////////////////////////////////////////////////////////////////// ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx) { ecotx_t* current_taxon; ecotx_t* next_taxon; current_taxon = taxon; next_taxon = current_taxon->parent; while ((current_taxon != next_taxon) && // root node (current_taxon->rank != rankidx)) { current_taxon = next_taxon; next_taxon = current_taxon->parent; } if (current_taxon->rank == rankidx) return current_taxon; else return NULL; } ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid) { ecotx_t *current_taxon; int32_t count; count = taxonomy->taxa->count; current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid), (const void *) taxonomy->taxa->taxon, count, sizeof(ecotx_t), bcomptaxon); return current_taxon; } bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid) { ecotx_t* next_parent; next_parent = taxon->parent; while ((other_taxid != next_parent->taxid) && (strcmp(next_parent->name, "root"))) next_parent = next_parent->parent; if (other_taxid == next_parent->taxid) return 1; else return 0; } ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("species", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the species associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("genus", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the genus associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("family", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the family associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("kingdom", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the kingdom associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("superkingdom", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the superkingdom associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } // Functions to write taxonomy structure to binary files int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t length; // Compute file size file_size = sizeof(int32_t); for (i=0; i < (tax->ranks)->count; i++) { file_size = file_size + sizeof(int32_t); // To store label size file_size = file_size + strlen(((tax->ranks)->label)[i]); // To store label } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank count if (write(file_descriptor, &((tax->ranks)->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write ranks for (i=0; i < (tax->ranks)->count; i++) { length = strlen(((tax->ranks)->label)[i]); // Write rank size if (write(file_descriptor, &length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank label if (write(file_descriptor, ((tax->ranks)->label)[i], length) < ((ssize_t) length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBIDMS_UNKNOWN_ERROR); obidebug(1, "\nError closing a DMS information file"); return -1; } return 0; } int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=0; i < (tax->taxa)->count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->taxa->taxon[i].name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &(tax->taxa->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=0; i < tax->taxa->count; i++) { name_length = strlen(tax->taxa->taxon[i].name); record_size = 4*sizeof(int32_t) + name_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid if (write(file_descriptor, &(tax->taxa->taxon[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank index if (write(file_descriptor, &(tax->taxa->taxon[i].rank), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write parent index if (write(file_descriptor, &((tax->taxa->taxon[i].parent)->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->taxa->taxon[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBIDMS_UNKNOWN_ERROR); obidebug(1, "\nError closing a DMS information file"); return -1; } return 0; } int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t class_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=0; i < (tax->names)->count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->names->names[i].name); // To store name file_size = file_size + strlen(tax->names->names[i].class_name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &(tax->names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=0; i < tax->names->count; i++) { name_length = strlen(tax->names->names[i].name); class_length = strlen(tax->names->names[i].class_name); record_size = 4*sizeof(int32_t) + name_length + class_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write if the name is a scientific name if (write(file_descriptor, &(tax->names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write class length if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid index if (write(file_descriptor, &(tax->names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->names->names[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write class if (write(file_descriptor, tax->names->names[i].class_name, class_length) < ((ssize_t) class_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBIDMS_UNKNOWN_ERROR); obidebug(1, "\nError closing a DMS information file"); return -1; } return 0; }