/******************************************************************** * OBIDMS taxonomy functions * ********************************************************************/ /** * @file obidms_taxonomy.c * @author Celine Mercier (celine.mercier@metabarcoding.org) * @date March 2nd 2016 * @brief Functions for reading binary taxonomy files. */ #include #include #include #include #include #include #include #include #include "obidms_taxonomy.h" #include "obidms.h" #include "obidebug.h" #include "obierrno.h" #include "utils.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) int cmp_rank_labels(const void* label1, const void* label2) { return strcmp((const char*)label1,*(const char**)label2); } static int cmp_taxids_in_ecotx_t(const void* ptaxid, const void* ptaxon) { ecotx_t* current_taxon = (ecotx_t*) ptaxon; int32_t taxid = (int32_t) ((size_t) ptaxid); return taxid - current_taxon->taxid; } static int cmp_taxids_in_ecomerged_t(const void* ptaxid, const void* ptaxon) { ecomerged_t* current_taxon = (ecomerged_t*) ptaxon; int32_t taxid = (int32_t) ((size_t) ptaxid); return taxid - current_taxon->taxid; } static int cmp_str(const void* s1, const void* s2) { return strcmp(*((char**)s1), *((char**)s2)); } static int cmp_names(const void* n1, const void* n2) { econame_t name1 = *((econame_t*)n1); econame_t name2 = *((econame_t*)n2); return strcmp(name1.name, name2.name); } char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name) { char* all_tax_dir_path; char* tax_path; all_tax_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME); if (all_tax_dir_path == NULL) return NULL; tax_path = (char*) malloc((strlen(all_tax_dir_path) + strlen(tax_name) + 2)*sizeof(char)); if (tax_path == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxonomy path"); free(all_tax_dir_path); return NULL; } if (sprintf(tax_path, "%s/%s", all_tax_dir_path, tax_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building taxonomy path"); free(all_tax_dir_path); return NULL; } free(all_tax_dir_path); return tax_path; } int32_t rank_index(const char* label, ecorankidx_t* ranks) { char **rep; rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), cmp_rank_labels); if (rep) return rep-ranks->label; return -1; } void* read_ecorecord(FILE* f, int32_t* record_size) { static void* buffer = NULL; int32_t buffer_size = 0; int32_t read; if (!record_size) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: record_size can not be NULL"); return NULL; } read = fread(record_size, sizeof(int32_t), 1, f); if (feof(f)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: reached end of file"); return NULL; } if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: error reading record size"); return NULL; } if (buffer_size < *record_size) { if (buffer) buffer = realloc(buffer, *record_size); else buffer = malloc(*record_size); if (buffer == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading a taxonomy file: error allocating memory"); return NULL; } } read = fread(buffer, *record_size, 1, f); if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: error reading a record %d, %d", read, *record_size); free(buffer); return NULL; } return buffer; }; ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon) { ecotxformat_t* raw; int32_t record_length; raw = read_ecorecord(f, &record_length); if (!raw) return NULL; taxon->parent = (ecotx_t*) ((size_t) raw->parent); taxon->taxid = raw->taxid; taxon->rank = raw->rank; taxon->farest = -1; taxon->name = malloc((raw->name_length+1) * sizeof(char)); if (taxon->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading a taxonomy file: error allocating memory"); return NULL; } strncpy(taxon->name, raw->name, raw->name_length); taxon->name[raw->name_length] = 0; // TODO note: this line is probably missing in ROBITaxonomy and source of a bug return taxon; } FILE* open_ecorecorddb(const char* file_name, int32_t* count, int32_t abort_on_open_error) { FILE* f; int32_t read; f = fopen(file_name, "rb"); if (!f) { if (abort_on_open_error) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nCouldn't open a taxonomy file"); fclose(f); return NULL; } else { *count = 0; fclose(f); return NULL; } } read = fread(count, sizeof(int32_t), 1, f); if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy record size"); fclose(f); return NULL; } return f; } ecorankidx_t* read_rankidx(const char* ranks_file_name) { int32_t count; FILE* ranks_file; ecorankidx_t* ranks_index; int32_t i; int32_t rank_length; char* buffer; ranks_file = open_ecorecorddb(ranks_file_name, &count, 0); if (ranks_file==NULL) return NULL; ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * (count-1)); if (ranks_index == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxonomy rank structure"); fclose(ranks_file); return NULL; } ranks_index->count = count; for (i=0; i < count; i++) { buffer = read_ecorecord(ranks_file, &rank_length); if (buffer == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a value in a taxonomy file"); fclose(ranks_file); free(ranks_index); return NULL; } ranks_index->label[i] = (char*) malloc(rank_length+1); if (ranks_index->label[i] == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxonomy rank label"); fclose(ranks_file); free(ranks_index); free(buffer); return NULL; } strncpy(ranks_index->label[i], buffer, rank_length); (ranks_index->label[i])[rank_length] = 0; } fclose(ranks_file); return ranks_index; } ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name) { int32_t count_taxa; int32_t count_local_taxa; FILE* f_taxa; FILE* f_local_taxa; ecotxidx_t* taxa_index; struct ecotxnode* t; int32_t i; int32_t j; f_taxa = open_ecorecorddb(taxa_file_name, &count_taxa, 1); if (f_taxa == NULL) { obidebug(1, "\nError reading taxonomy taxa file"); return NULL; } f_local_taxa = open_ecorecorddb(local_taxa_file_name, &count_local_taxa, 0); taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa - 1)); if (taxa_index == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxonomy structure"); fclose(f_taxa); fclose(f_local_taxa); return NULL; } taxa_index->count = count_taxa + count_local_taxa; taxa_index->ncbi_count = count_taxa; taxa_index->local_count = count_local_taxa; taxa_index->buffer_size = taxa_index->count; taxa_index->max_taxid = 0; printf("Reading %d taxa...\n", count_taxa); for (i=0; itaxon[i])); taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent->farest = 0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) taxa_index->max_taxid = taxa_index->taxon[i].taxid; } if (count_local_taxa > 0) printf("Reading %d local taxa...\n", count_local_taxa); else printf("No local taxa\n"); count_taxa = taxa_index->count; for (; i < count_taxa; i++){ readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i])); taxa_index->taxon[i].idx = i; taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent->farest=0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) taxa_index->max_taxid = taxa_index->taxon[i].taxid; } for (i=0; i < count_taxa; i++) { t = taxa_index->taxon+i; if (t->farest == -1) { t->farest=0; while (t->parent != t) { j = t->farest + 1; if (j > t->parent->farest) { t->parent->farest = j; t=t->parent; } else t = taxa_index->taxon; } } } fclose(f_taxa); if (f_local_taxa != NULL) fclose(f_local_taxa); return taxa_index; } econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy) { econameformat_t* raw; int32_t record_length; raw = read_ecorecord(f, &record_length); if (raw == NULL) return NULL; name->is_scientific_name = raw->is_scientific_name; name->name = malloc((raw->name_length + 1) * sizeof(char)); if (name->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon name"); free(raw); return NULL; } strncpy(name->name, raw->names, raw->name_length); name->name[raw->name_length] = 0; name->class_name = malloc((raw->class_length+1) * sizeof(char)); if (name->class_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon class name"); free(name->name); free(raw); return NULL; } strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length); name->class_name[raw->class_length] = 0; name->taxon = taxonomy->taxa->taxon + raw->taxid; return name; } econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy) { int32_t count; FILE* f; econameidx_t* index_names; int32_t i; f = open_ecorecorddb(file_name, &count, 0); if (f == NULL) { obidebug(1, "\nError reading taxonomy name file"); return NULL; } index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * (count-1)); if (index_names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading taxonomy name file"); return NULL; } index_names->count = count; for (i=0; i < count; i++) { readnext_econame(f, (index_names->names)+i, taxonomy); if ((index_names->names)+i == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy name file"); free(index_names); return NULL; } } fclose(f); return index_names; } ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy) { int32_t count; FILE* f; ecomergedidx_t* index_merged_idx; ecomerged_t* merged_idx; int32_t i; int32_t record_length; f = open_ecorecorddb(file_name, &count, 0); if (f == NULL) { obidebug(1, "\nError reading taxonomy name file"); return NULL; } index_merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + (sizeof(ecomerged_t) * count)); if (index_merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading taxonomy name file"); return NULL; } index_merged_idx->count = count; for (i=0; i < count; i++) { merged_idx = read_ecorecord(f, &record_length); memcpy((index_merged_idx->merged)+i, merged_idx, record_length); if ((index_merged_idx->merged)+i == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy name file"); free(index_merged_idx); return NULL; } } fclose(f); return index_merged_idx; } // Functions to write taxonomy structure to binary files int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t length; // Compute file size file_size = sizeof(int32_t); for (i=0; i < (tax->ranks)->count; i++) { file_size = file_size + sizeof(int32_t); // To store label size file_size = file_size + strlen(((tax->ranks)->label)[i]); // To store label } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file %s", file_name); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank count if (write(file_descriptor, &((tax->ranks)->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write ranks for (i=0; i < (tax->ranks)->count; i++) { length = strlen(((tax->ranks)->label)[i]); // Write rank size if (write(file_descriptor, &length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank label if (write(file_descriptor, ((tax->ranks)->label)[i], length) < ((ssize_t) length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBIDMS_UNKNOWN_ERROR); obidebug(1, "\nError closing a DMS information file"); return -1; } return 0; } int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=0; i < (tax->taxa)->ncbi_count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->taxa->taxon[i].name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &(tax->taxa->ncbi_count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=0; i < (tax->taxa)->ncbi_count; i++) { name_length = strlen(tax->taxa->taxon[i].name); record_size = 4*sizeof(int32_t) + name_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid if (write(file_descriptor, &(tax->taxa->taxon[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank index if (write(file_descriptor, &(tax->taxa->taxon[i].rank), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write parent index if (write(file_descriptor, &((tax->taxa->taxon[i].parent)->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->taxa->taxon[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBIDMS_UNKNOWN_ERROR); obidebug(1, "\nError closing a DMS information file"); return -1; } return 0; } int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=(tax->taxa)->ncbi_count; i < (tax->taxa)->count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->taxa->taxon[i].name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.ldx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &((tax->taxa)->local_count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=(tax->taxa)->ncbi_count; i < (tax->taxa)->count; i++) { name_length = strlen(tax->taxa->taxon[i].name); record_size = 4*sizeof(int32_t) + name_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid if (write(file_descriptor, &(tax->taxa->taxon[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank index if (write(file_descriptor, &(tax->taxa->taxon[i].rank), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write parent index if (write(file_descriptor, &((tax->taxa->taxon[i].parent)->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->taxa->taxon[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBIDMS_UNKNOWN_ERROR); obidebug(1, "\nError closing a DMS information file"); return -1; } return 0; } int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t class_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=0; i < (tax->names)->count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->names->names[i].name); // To store name file_size = file_size + strlen(tax->names->names[i].class_name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &(tax->names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=0; i < tax->names->count; i++) { name_length = strlen(tax->names->names[i].name); class_length = strlen(tax->names->names[i].class_name); record_size = 4*sizeof(int32_t) + name_length + class_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write if the name is a scientific name if (write(file_descriptor, &(tax->names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write class length if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid index if (write(file_descriptor, &(tax->names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->names->names[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write class if (write(file_descriptor, tax->names->names[i].class_name, class_length) < ((ssize_t) class_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a DMS information file"); return -1; } return 0; } int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t record_size; // Compute file size file_size = sizeof(int32_t) + (sizeof(int32_t) * 3 * (tax->merged_idx)->count); // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.adx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file %s", file_name); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write merged indices count if (write(file_descriptor, &((tax->merged_idx)->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } record_size = 2 * sizeof(int32_t); // Write merged indices for (i=0; i < (tax->merged_idx)->count; i++) { // Write record size if (write(file_descriptor, &(record_size), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid if (write(file_descriptor, &(((tax->merged_idx)->merged)[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write index corresponding to the taxid in the ecotxidx_t structure if (write(file_descriptor, &(((tax->merged_idx)->merged)[i].idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxonomy file file"); return -1; } return 0; } int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name) { char* taxonomy_path; // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, tax_name); if (taxonomy_path == NULL) return -1; // Try to create the directory if (mkdir(taxonomy_path, 00777) < 0) { if (errno == EEXIST) obidebug(1, "\nA taxonomy already exists with this name."); obidebug(1, "\nProblem creating a new taxonomy directory"); free(taxonomy_path); return -1; } free(taxonomy_path); if (write_rankidx(dms, tax, tax_name) < 0) return -1; if (write_taxonomyidx(dms, tax, tax_name) < 0) return -1; if (write_nameidx(dms, tax, tax_name) < 0) return -1; if (write_mergedidx(dms, tax, tax_name) < 0) return -1; // Check if there are local taxa (if so last taxon is local) if ((tax->taxa)->local_count > 0) if (write_local_taxonomy_idx(dms, tax, tax_name) < 0) return -1; return 0; } OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump) { OBIDMS_taxonomy_p tax; struct dirent* dp; DIR* tax_dir; FILE* file; bool nodes_found=false; bool names_found=false; bool merged_found=false; bool delnodes_found=false; int32_t* delnodes=NULL; int32_t delnodes_count; char line[2048]; // TODO large enough? char* elt; char* file_name; int buffer_size; int i, j; int n, nD, nT; char** rank_names; int* parent_taxids; int taxid, old_taxid; bool already_in; ecotx_t* t; // Initialize taxonomy structure tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t)); if (tax == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); return NULL; } tax->ranks = NULL; tax->taxa = NULL; tax->names = NULL; tax->merged_idx = NULL; tax->dms = NULL; (tax->tax_name)[0] = '\0'; // TODO check if taxdump path is for a gz file to unzip or a directory tax_dir = opendir(taxdump); if (tax_dir == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxdump directory"); free(tax); return NULL; } // Go through taxonomy files while ((dp = readdir(tax_dir)) != NULL) { if (strcmp(dp->d_name, "nodes.dmp") == 0) { nodes_found = true; buffer_size = 10000; // Initializing the taxa structure tax->taxa = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * buffer_size); if (tax->taxa == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); free(tax); closedir(tax_dir); return NULL; } // Initialize rank names and parent taxids arrays parent_taxids = malloc(buffer_size * sizeof(int)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); obi_close_taxonomy(tax); closedir(tax_dir); return NULL; } rank_names = malloc(buffer_size * sizeof(char*)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); obi_close_taxonomy(tax); free(parent_taxids); closedir(tax_dir); return NULL; } // Allocating the memory for the file name file_name = (char*) malloc((strlen(taxdump) + 10)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); closedir(tax_dir); return NULL; } // Build the file path if (sprintf(file_name, "%s/nodes.dmp", taxdump) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a taxonomy file name"); obi_close_taxonomy(tax); closedir(tax_dir); free(parent_taxids); free(rank_names); free(file_name); return NULL; } file = fopen(file_name, "r"); if (file == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxonomy file"); obi_close_taxonomy(tax); closedir(tax_dir); free(parent_taxids); free(rank_names); free(file_name); return NULL; } free(file_name); (tax->taxa)->max_taxid = 0; n = 0; while (fgets(line, sizeof(line), file)) { // Enlarge structures if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * buffer_size); if (tax->taxa == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } parent_taxids = (int*) realloc(parent_taxids, sizeof(int) * buffer_size); if (parent_taxids == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } rank_names = (char**) realloc(rank_names, sizeof(char*) * buffer_size); if (rank_names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } } // Check for terminal '\n' character (line complete) if (line[strlen(line) - 1] != '\n') { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } (tax->taxa)->taxon[n].idx = n; // Parse 3 first elements separated by '|' elt = strtok(line, "|"); // Remove the last character (tab character) elt[strlen(elt)-1] = '\0'; // First element: taxid (tax->taxa)->taxon[n].taxid = atoi(elt); // Update max taxid if ((tax->taxa)->taxon[n].taxid > (tax->taxa)->max_taxid) (tax->taxa)->max_taxid = (tax->taxa)->taxon[n].taxid; // Initialize farest taxid value (tax->taxa)->taxon[n].farest = -1; i = 1; while (i < 3) { elt = strtok(NULL, "|"); // Remove the first and the last characters (tab characters) elt = elt+1; elt[strlen(elt)-1] = '\0'; if (i == 1) parent_taxids[n] = atoi(elt); else if (i == 2) { rank_names[n] = (char*) malloc((strlen(elt)+1) * sizeof(char)); if (rank_names[n] == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxon rank name"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } strcpy(rank_names[n], elt); } i++; } n++; } // Check that fgets stopped because it reached EOF if (!feof(file)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: file reading was stopped before end of file"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } // Store count (tax->taxa)->count = n; (tax->taxa)->ncbi_count = n; (tax->taxa)->local_count = 0; // Truncate the structure memory to the right size tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (tax->taxa)->count); if (tax->taxa == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } fclose(file); } } closedir(tax_dir); // Go through directory again for next file // TODO make separate functions? tax_dir = opendir(taxdump); if (tax_dir == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxdump directory"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } // Go through taxonomy files while ((dp = readdir(tax_dir)) != NULL) { if (strcmp(dp->d_name, "delnodes.dmp") == 0) { delnodes_found = true; buffer_size = 10000; // Initializing the list of deleted nodes delnodes = (int32_t*) malloc(sizeof(int32_t) * buffer_size); if (delnodes == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); closedir(tax_dir); return NULL; } // Allocating the memory for the file name file_name = (char*) malloc((strlen(taxdump) + 12)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); free(delnodes); closedir(tax_dir); return NULL; } // Build the file path if (sprintf(file_name, "%s/delnodes.dmp", taxdump) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a taxonomy file name"); obi_close_taxonomy(tax); closedir(tax_dir); free(parent_taxids); free(rank_names); free(file_name); free(delnodes); return NULL; } file = fopen(file_name, "r"); if (file == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxonomy file"); obi_close_taxonomy(tax); closedir(tax_dir); free(parent_taxids); free(rank_names); free(file_name); free(delnodes); return NULL; } free(file_name); n = 0; while (fgets(line, sizeof(line), file)) { // Check for terminal '\n' character (line complete) if (line[strlen(line) - 1] != '\n') { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); free(delnodes); return NULL; } // Get first and only element of the line (the deprecated taxid) elt = strtok(line, "|"); // Remove the last character (tab character) elt[strlen(elt)-1] = '\0'; // First element: old deprecated taxid old_taxid = atoi(elt); // Store the old taxid in the list of deleted taxids // Enlarge array if needed if (n == buffer_size) { buffer_size = buffer_size * 2; delnodes = (int32_t*) realloc(tax->merged_idx, sizeof(int32_t) * buffer_size); if (delnodes == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } } delnodes[n] = old_taxid; n++; } // Check that fgets stopped because it reached EOF if (!feof(file)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: file reading was stopped before end of file"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); free(delnodes); return NULL; } // Store count delnodes_count = n; fclose(file); } } closedir(tax_dir); // Go through directory again for next file // TODO make separate functions? tax_dir = opendir(taxdump); if (tax_dir == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxdump directory"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); free(delnodes); return NULL; } // Go through taxonomy files while ((dp = readdir(tax_dir)) != NULL) { if (strcmp(dp->d_name, "merged.dmp") == 0) { merged_found = true; buffer_size = 10000; // Initializing the merged structure tax->merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); free(delnodes); closedir(tax_dir); return NULL; } // Allocating the memory for the file name file_name = (char*) malloc((strlen(taxdump) + 12)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); free(delnodes); closedir(tax_dir); return NULL; } // Build the file path if (sprintf(file_name, "%s/merged.dmp", taxdump) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a taxonomy file name"); obi_close_taxonomy(tax); closedir(tax_dir); free(parent_taxids); free(rank_names); free(file_name); free(delnodes); return NULL; } file = fopen(file_name, "r"); if (file == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxonomy file"); obi_close_taxonomy(tax); closedir(tax_dir); free(parent_taxids); free(rank_names); free(file_name); free(delnodes); return NULL; } free(file_name); nT = 0; // to point in current taxa list while merging nD = delnodes_count-1; // to point in deleted taxids list while merging (going from count-1 to 0 because taxids are sorted in descending order) n = 0; // to point in final merged list while merging while (fgets(line, sizeof(line), file)) { // Check for terminal '\n' character (line complete) if (line[strlen(line) - 1] != '\n') { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); free(delnodes); return NULL; } // Parse the 2 elements separated by '|' // Get first element elt = strtok(line, "|"); // Remove the last character (tab character) elt[strlen(elt)-1] = '\0'; // First element: old deprecated taxid old_taxid = atoi(elt); // Get 2nd element: new taxid elt = strtok(NULL, "|"); // Remove the first and the last characters (tab characters) elt = elt+1; elt[strlen(elt)-1] = '\0'; taxid = atoi(elt); // Store the old taxid in the merged_idx ordered taxid list // The merged list is an ordered list of the current taxids, the deprecated taxids that have current references, // and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index // of the taxon in the taxa structure, or -1 for deleted taxids. // Creating the merged list requires to merge the 3 ordered lists into one. while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) && ((nD >= 0) && (delnodes[nD] < old_taxid))) { if ((tax->taxa)->taxon[nT].taxid < delnodes[nD]) { // Add element from taxa list // Enlarge structure if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); free(delnodes); return NULL; } } (tax->merged_idx)->merged[n].taxid = (tax->taxa)->taxon[nT].taxid; (tax->merged_idx)->merged[n].idx = nT; nT++; n++; } else if (delnodes[nD] < (tax->taxa)->taxon[nT].taxid) { // Add element from deleted taxids list // Enlarge structure if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); free(delnodes); return NULL; } } (tax->merged_idx)->merged[n].taxid = delnodes[nD]; (tax->merged_idx)->merged[n].idx = -1; // The index to tag deleted taxids is -1 nD--; n++; } } // Add the deprecated taxid // Enlarge structure if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); free(delnodes); return NULL; } } // Store the deprecated taxid with the index that refers to the new taxid // Find the index of the new taxid t = obi_taxo_get_taxon_with_current_taxid(tax, taxid); // Store the old taxid with the index (tax->merged_idx)->merged[n].taxid = old_taxid; (tax->merged_idx)->merged[n].idx = t->idx; n++; } // Check that fgets stopped because it reached EOF if (!feof(file)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: file reading was stopped before end of file"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } // Store count (tax->merged_idx)->count = n; // Truncate the structure memory to the right size tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * (tax->merged_idx)->count); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } fclose(file); } } // Free delnodes array, not needed anymore free(delnodes); closedir(tax_dir); // Go through directory again for next file tax_dir = opendir(taxdump); if (tax_dir == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxdump directory"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } // Go through taxonomy files while ((dp = readdir(tax_dir)) != NULL) { if (strcmp(dp->d_name, "names.dmp") == 0) { names_found = true; buffer_size = 10000; // Initializing the names structure tax->names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * buffer_size); if (tax->names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); closedir(tax_dir); return NULL; } // Allocating the memory for the file name file_name = (char*) malloc((strlen(taxdump) + 11)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); closedir(tax_dir); return NULL; } // Build the file path if (sprintf(file_name, "%s/names.dmp", taxdump) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a taxonomy file name"); obi_close_taxonomy(tax); closedir(tax_dir); free(parent_taxids); free(rank_names); free(file_name); return NULL; } file = fopen(file_name, "r"); if (file == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxonomy file"); obi_close_taxonomy(tax); closedir(tax_dir); free(parent_taxids); free(rank_names); free(file_name); return NULL; } free(file_name); n = 0; j = 0; while (fgets(line, sizeof(line), file)) { // Enlarge structures if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * buffer_size); if (tax->names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } } // Check for terminal '\n' character (line complete) if (line[strlen(line) - 1] != '\n') { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } // Parse 4 first elements separated by '|' elt = strtok(line, "|"); // Remove the last character (tab character) elt[strlen(elt)-1] = '\0'; // First element: taxid taxid = atoi(elt); // Find taxid in taxa structure and store pointer in names structure i = j; while ((i < (tax->taxa)->count) && ((tax->taxa)->taxon[i].taxid != taxid)) i++; if (i == (tax->taxa)->count) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: could not find taxon associated to name when reading taxdump"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } j = i; // Because there are several names by taxon but they are in the same order (tax->names)->names[n].taxon = ((tax->taxa)->taxon)+i; i = 1; while (i < 4) { elt = strtok(NULL, "|"); // Remove the first and the last characters (tab characters) elt = elt+1; elt[strlen(elt)-1] = '\0'; if (i == 1) // Name { (tax->names)->names[n].name = (char*) malloc((strlen(elt) + 1) * sizeof(char)); if ((tax->names)->names[n].name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon name"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } strcpy((tax->names)->names[n].name, elt); } else if (i == 3) // Class name { (tax->names)->names[n].class_name = (char*) malloc((strlen(elt) + 1) * sizeof(char)); if ((tax->names)->names[n].class_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon class name"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } strcpy((tax->names)->names[n].class_name, elt); if (strcmp(elt, "scientific name") == 0) { (tax->names)->names[n].is_scientific_name = 1; } else (tax->names)->names[n].is_scientific_name = 0; } i++; } n++; } // Check that fgets stopped because it reached EOF if (!feof(file)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: file reading was stopped before end of file"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } // Store count (tax->names)->count = n; // Truncate the structure memory to the right size tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * (tax->names)->count); if (tax->names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a a taxonomy structure"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); free(parent_taxids); free(rank_names); return NULL; } fclose(file); } } closedir(tax_dir); if (!nodes_found) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem reading taxdump: nodes.dmp file not found"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } if (!names_found) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem reading taxdump: names.dmp file not found"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } // Go through data to fill the taxonomy structure // Build rank list // Initialize rank structure buffer_size = 10; tax->ranks = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * buffer_size); if (tax->ranks == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxon rank array"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } (tax->ranks)->count = 0; for (i=0; i < (tax->taxa)->count; i++) { already_in = false; for (j=0; j < (tax->ranks)->count; j++) { if (strcmp(rank_names[i], ((tax->ranks)->label)[j]) == 0) { already_in = true; break; } } if (!already_in) { // Realloc rank structure if needed if ((tax->ranks)->count == buffer_size) { buffer_size = buffer_size + 10; tax->ranks = (ecorankidx_t*) realloc(tax->ranks, sizeof(ecorankidx_t) + sizeof(char*) * buffer_size); if (tax->ranks == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for taxon ranks"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } } // Store new rank ((tax->ranks)->label)[(tax->ranks)->count] = (char*) malloc((strlen(rank_names[i]) + 1) * sizeof(char)); if (((tax->ranks)->label)[(tax->ranks)->count] == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxon rank names"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } strcpy(((tax->ranks)->label)[(tax->ranks)->count], rank_names[i]); ((tax->ranks)->count)++; } } // Truncate to the number of ranks recorded tax->ranks = (ecorankidx_t*) realloc(tax->ranks, sizeof(ecorankidx_t) + sizeof(char*) * (tax->ranks)->count); if (tax->ranks == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for taxon ranks"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } // Sort in alphabetical order qsort((tax->ranks)->label, (tax->ranks)->count, sizeof(char*), cmp_str); // Associate the taxa with their rank indices for (i=0; i < (tax->taxa)->count; i++) { for (j=0; j < (tax->ranks)->count; j++) { if (strcmp(rank_names[i], ((tax->ranks)->label)[j]) == 0) { ((tax->taxa)->taxon)[i].rank = j; break; } } } // Associate the taxa with their scientific name for (i=0; i < (tax->names)->count; i++) { if ((tax->names)->names[i].is_scientific_name) { ((tax->names)->names[i].taxon)->name = (char*) malloc((strlen((((tax->names)->names)[i]).name) + 1) * sizeof(char)); if (((tax->names)->names[i].taxon)->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for taxon ranks"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } strcpy(((tax->names)->names[i].taxon)->name, (((tax->names)->names)[i]).name); } } // Sort names in alphabetical order qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names); // Associate the taxa with their parent for (i=0; i < (tax->taxa)->count; i++) { ((tax->taxa)->taxon)[i].parent = obi_taxo_get_taxon_with_current_taxid(tax, parent_taxids[i]); if (((tax->taxa)->taxon)[i].parent == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: taxon parent not found"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } (((tax->taxa)->taxon)[i].parent)->farest = 0; } (tax->taxa)->buffer_size = (tax->taxa)->count; // Compute longest branches for (i=0; i < (tax->taxa)->count; i++) { t = (((tax->taxa))->taxon)+i; if (t->farest == -1) { t->farest=0; while (t->parent != t) { j = t->farest + 1; if (j > t->parent->farest) { t->parent->farest = j; t=t->parent; } else t = (tax->taxa)->taxon; } } } // Freeing free(parent_taxids); for (i=0; i < (tax->taxa)->count; i++) free(rank_names[i]); free(rank_names); return tax; } int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid) { int32_t taxid; ecotx_t* taxon; econame_t* name_struct; int i; // Enlarge the structure memory for a new taxon tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (((tax->taxa)->count) + 1)); if (tax->taxa == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon"); return -1; } // Compute new taxid that must be equal or greater than 1E7 and greater than the maximum taxid existing in the taxonomy if (min_taxid < MIN_LOCAL_TAXID) min_taxid = MIN_LOCAL_TAXID; if (min_taxid > (tax->taxa)->max_taxid) taxid = min_taxid; else taxid = ((tax->taxa)->max_taxid) + 1; // Fill the ecotx_t node structure taxon = ((tax->taxa)->taxon)+((tax->taxa)->count); taxon->taxid = taxid; taxon->idx = (tax->taxa)->count; taxon->local = true; taxon->name = (char*) malloc((strlen(name) + 1) * sizeof(char)); if (taxon->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon name to add a new taxon"); return -1; } strcpy(taxon->name, name); taxon->rank = -1; for (i=0; i < (tax->ranks)->count; i++) { if (strcmp(rank_name, ((tax->ranks)->label)[i]) == 0) { taxon->rank = i; break; } } if (taxon->rank == -1) // TODO Discuss possibility of creating rank if doesn't exist { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: taxon rank not found when adding a new taxon"); return -1; } taxon->parent = obi_taxo_get_taxon_with_taxid(tax, parent_taxid); if (taxon->parent == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: taxon parent not found when adding a new taxon"); return -1; } taxon->farest = 0; // TODO not sure // Update taxonomy counts etc (tax->taxa)->max_taxid = taxid; ((tax->taxa)->count)++; ((tax->taxa)->local_count)++; (tax->taxa)->buffer_size = (tax->taxa)->count; // Add new name in names structure // TODO discuss because in OBITools1 the new names were not written in .ndx // Allocate memory for new name tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1)); if (tax->names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon"); return -1; } // Add new name name_struct = (tax->names)->names + ((tax->names)->count); name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char)); if (name_struct->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon name to add a new taxon"); return -1; } strcpy(name_struct->name, name); name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char)); if (name_struct->class_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon"); return -1; } strcpy(name_struct->class_name, "scientific name"); name_struct->is_scientific_name = true; name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1; // Sort names in alphabetical order qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names); // Update name count ((tax->names)->count)++; return taxid; } /////// PUBLIC ///////// OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names) { OBIDMS_taxonomy_p tax; char* taxonomy_path; char* ranks_file_name; char* taxa_file_name; char* merged_idx_file_name; char* local_taxa_file_name; char* alter_names_file_name; int buffer_size; tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t)); if (tax == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxonomy structure"); return NULL; } tax->ranks = NULL; tax->taxa = NULL; tax->names = NULL; tax->merged_idx = NULL; tax->dms = dms; strcpy(tax->tax_name, taxonomy_name); buffer_size = 2048; taxonomy_path = get_taxonomy_path(dms, taxonomy_name); if (taxonomy_path == NULL) return NULL; // Read ranks ranks_file_name = (char*) malloc(buffer_size*sizeof(char)); if (ranks_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for ranks file name"); free(taxonomy_path); free(tax); return NULL; } if (snprintf(ranks_file_name, buffer_size, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building ranks file name"); free(taxonomy_path); free(ranks_file_name); free(tax); return NULL; } tax->ranks = read_rankidx(ranks_file_name); if (tax->ranks == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building ranks file name"); free(taxonomy_path); free(ranks_file_name); free(tax); return NULL; } free(ranks_file_name); // Read taxa taxa_file_name = (char*) malloc(buffer_size*sizeof(char)); if (taxa_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxa file name"); free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(taxa_file_name, buffer_size, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building taxa file name"); free(taxonomy_path); free(taxa_file_name); obi_close_taxonomy(tax); return NULL; } local_taxa_file_name = (char*) malloc(buffer_size*sizeof(char)); if (local_taxa_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxa file name"); free(taxonomy_path); free(taxa_file_name); obi_close_taxonomy(tax); return NULL; } if (snprintf(local_taxa_file_name, buffer_size, "%s/%s.ldx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building local taxa file name"); free(taxonomy_path); free(taxa_file_name); free(local_taxa_file_name); obi_close_taxonomy(tax); return NULL; } tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name); if (tax->taxa == NULL) { free(taxonomy_path); free(taxa_file_name); free(local_taxa_file_name); obi_close_taxonomy(tax); return NULL; } free(taxa_file_name); free(local_taxa_file_name); // Read merged index (old and current taxids referring to indices in the taxa structure) merged_idx_file_name = (char*) malloc(buffer_size*sizeof(char)); if (merged_idx_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for merged index file name"); free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(merged_idx_file_name, buffer_size, "%s/%s.adx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building merged index file name"); free(taxonomy_path); free(merged_idx_file_name); obi_close_taxonomy(tax); return NULL; } tax->merged_idx = read_mergedidx(merged_idx_file_name, tax); if (tax->merged_idx == NULL) { free(taxonomy_path); free(merged_idx_file_name); obi_close_taxonomy(tax); return NULL; } free(merged_idx_file_name); // Read alternative names if (read_alternative_names) { alter_names_file_name = (char*) malloc(buffer_size*sizeof(char)); if (alter_names_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for alternative names file name"); free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(alter_names_file_name, buffer_size, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building alternative names file name"); free(taxonomy_path); free(alter_names_file_name); obi_close_taxonomy(tax); return NULL; } tax->names = read_nameidx(alter_names_file_name, tax); if (tax->names == NULL) { free(taxonomy_path); free(alter_names_file_name); obi_close_taxonomy(tax); return NULL; } free(alter_names_file_name); } free(taxonomy_path); return tax; } int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy) { int i; // Update local informations (local taxa and preferred names) if there are any if ((taxonomy->taxa)->local_count > 0) { if (taxonomy->dms == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxonomy with local files but no DMS associated (probably read directly from taxdump)"); // TODO discuss } if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0) return -1; } if (taxonomy) { if (taxonomy->ranks) { for (i=0; i < (taxonomy->ranks)->count; i++) { if ((taxonomy->ranks)->label[i]) free((taxonomy->ranks)->label[i]); } free(taxonomy->ranks); } if (taxonomy->names) { for (i=0; i < (taxonomy->names)->count; i++) { if (((taxonomy->names)->names[i]).name) free(((taxonomy->names)->names[i]).name); if (((taxonomy->names)->names[i]).class_name) free(((taxonomy->names)->names[i]).class_name); } free(taxonomy->names); } if (taxonomy->taxa) { for (i=0; i < (taxonomy->taxa)->count; i++) { if (((taxonomy->taxa)->taxon[i]).name) free(((taxonomy->taxa)->taxon[i]).name); } free(taxonomy->taxa); } if (taxonomy->merged_idx) { free(taxonomy->merged_idx); } free(taxonomy); } return 0; } ////////////////////////////////////////////////////////////////////////// ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx) { ecotx_t* current_taxon; ecotx_t* next_taxon; current_taxon = taxon; next_taxon = current_taxon->parent; while ((current_taxon != next_taxon) && // root node (current_taxon->rank != rankidx)) { current_taxon = next_taxon; next_taxon = current_taxon->parent; } if (current_taxon->rank == rankidx) return current_taxon; else return NULL; } ecotx_t* obi_taxo_get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid) // TODO discuss keeping private? { ecotx_t *current_taxon; int32_t count; count = (taxonomy->taxa)->count; current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid), (const void *) taxonomy->taxa->taxon, count, sizeof(ecotx_t), cmp_taxids_in_ecotx_t); return current_taxon; } ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid) { ecotx_t *current_taxon; ecomerged_t *indexed_taxon; int32_t count; count = (taxonomy->merged_idx)->count; indexed_taxon = (ecomerged_t*) bsearch((const void *) ((size_t) taxid), (const void *) taxonomy->merged_idx->merged, count, sizeof(ecomerged_t), cmp_taxids_in_ecomerged_t); if (indexed_taxon == NULL) current_taxon = NULL; else if (indexed_taxon->idx == -1) current_taxon = NULL; // TODO discuss what to do when old deleted taxon else current_taxon = (taxonomy->taxa->taxon)+(indexed_taxon->idx); return current_taxon; } bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid) // TODO discuss that this doesn't work with deprecated taxids { ecotx_t* next_parent; next_parent = taxon->parent; while ((other_taxid != next_parent->taxid) && (strcmp(next_parent->name, "root"))) next_parent = next_parent->parent; if (other_taxid == next_parent->taxid) return 1; else return 0; } ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("species", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the species associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("genus", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the genus associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("family", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the family associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("kingdom", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the kingdom associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static OBIDMS_taxonomy_p tax = NULL; static int32_t rankindex = -1; if (taxonomy && (tax != taxonomy)) { rankindex = rank_index("superkingdom", taxonomy->ranks); tax = taxonomy; } if (!tax || (rankindex < 0)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the superkingdom associated with a taxon: No taxonomy defined"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); }