/******************************************************************** * OBIDMS taxonomy functions * ********************************************************************/ /** * @file obidms_taxonomy.c * @author Celine Mercier (celine.mercier@metabarcoding.org) * @date March 2nd 2016 * @brief Functions for handling the reading and writing of taxonomy files. */ #include #include #include #include #include #include #include #include #include "obidms_taxonomy.h" #include "obidms.h" #include "obidebug.h" #include "obierrno.h" #include "utils.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) /************************************************************************** * * D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S * **************************************************************************/ /** * @brief Internal function comparing two rank names. * * @param label1 A char* pointer on the first rank name. * @param label2 A char** pointer on a second pointer, that second char* pointer being on the second rank name. * (making the function usable with an ecorankidx_t structure and functions like bsearch) * * @returns A value < 0 if label1 < label2, * a value > 0 if label1 > label2, * and 0 if label1 == label2. */ static int cmp_rank_labels(const void* label1, const void* label2); /** * @brief Internal function comparing two taxids, one of them stored in an ecotx_t structure. * * @param ptaxid The first taxid. * @param ptaxon A pointer on an ecotx_t structure where the second taxid is stored. * * @returns A value < 0 if taxid1 < taxid2, * a value > 0 if taxid1 > taxid2, * and 0 if taxid1 == taxid2. */ static int cmp_taxids_in_ecotx_t(const void* ptaxid, const void* ptaxon); /** * @brief Internal function comparing two taxids, one of them stored in an ecomerged_t structure. * * @param ptaxid The first taxid. * @param ptaxon A pointer on an ecomerged_t structure where the second taxid is stored. * * @returns A value < 0 if taxid1 < taxid2, * a value > 0 if taxid1 > taxid2, * and 0 if taxid1 == taxid2. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int cmp_taxids_in_ecomerged_t(const void* ptaxid, const void* ptaxon); /** * @brief Internal function comparing two character strings pointed to by char** pointers. * * @param s1 A char** pointer on a second pointer, that second char* pointer being on the first character string. * @param s2 A char** pointer on a second pointer, that second char* pointer being on the second character string. * * @returns A value < 0 if s1 < s2, * a value > 0 if s1 > s2, * and 0 if s1 == s2. */ static int cmp_str(const void* s1, const void* s2); /** * @brief Internal function comparing two taxon names stored in econame_t structures. * * @param n1 A pointer on the first econame_t structure. * @param n2 A pointer on the second econame_t structure. * * @returns A value < 0 if n1 < n2, * a value > 0 if n1 > n2, * and 0 if n1 == n2. * * @since 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int cmp_names(const void* n1, const void* n2); /** * @brief Internal function comparing returning the ecotx_t structure associated with a taxid. * * This function only looks for the taxid in the modern taxonomy, it does not consider deprecated * and old taxids, unlike obi_taxo_get_taxon_with_taxid(). * * @param taxonomy A pointer on the taxonomy structure. * @param taxid The taxid of the taxon wanted. * * @returns A pointer on the ecotx_t structure associated with a taxid. * * @see obi_taxo_get_taxon_with_taxid() */ static ecotx_t* get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid); /** * @brief Internal function returning the complete path to a taxonomy directory in a DMS. * * @param dms A pointer on the DMS. * @param tax_name The name of the taxonomy. * * @returns The complete path to the taxonomy directory. * @retval NULL if an error occurred. * * @since 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name); /** * @brief Internal function returning the index of a rank in an ecorankidx_t structure. * * @param label The name of the rank. * @param ranks A pointer on an ecorankidx_t structure. * * @returns The index of a rank in the ecorankidx_t structure. * @retval -1 if the rank was not found. */ static int32_t rank_label_to_index(const char* label, ecorankidx_t* ranks); /** * @brief Internal function opening a binary taxonomy file (.tdx, .rdx, .ndx, .adx, .pdx, .ldx). * * @param file_name The file path. * @param count A pointer on an integer that the function will set to the number of records in the file. * @param abort_on_open_error A boolean indicating whether the function should trigger an error if the file can't be open. * * @returns The FILE object. * @retval NULL if an error occurred or if the file was not found. */ static FILE* open_ecorecorddb(const char* file_name, int32_t* count, int32_t abort_on_open_error); /** * @brief Internal function returning the next record in a binary taxonomy file (.tdx, .rdx, .ndx, .adx, .pdx, .ldx). * * @param f The file object with the offset at the start of a record. * @param record_size A pointer on an integer that the function will set to the size of the record. * * @returns A pointer on the read record. * @retval NULL if an error occurred. */ static void* read_ecorecord(FILE* f, int32_t* record_size); /** * @brief Internal function reading the next taxon record in a .tdx binary taxonomy file. * * @param f The file object with the offset at the start of a record. * @param taxon A pointer on an empty, allocated ecotx_t structure that the function will fill. * * @returns A pointer on the read record. * @retval NULL if an error occurred. */ static ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon); /** * @brief Internal function reading the next taxon name record in a .ndx binary taxonomy file. * * @param f The file object with the offset at the start of a record. * @param name A pointer on an empty, allocated econame_t structure that the function will fill. * @param taxonomy A pointer on the taxonomy structure. * * @returns A pointer on the read record. * @retval NULL if an error occurred. */ static econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy); /** * @brief Internal function reading the next taxon preferred name record in a .pdx binary taxonomy file. * * @param f The file object with the offset at the start of a record. * @param name A pointer on an empty, allocated econame_t structure that the function will fill. * @param taxonomy A pointer on the taxonomy structure. * * @returns A pointer on the read record. * @retval NULL if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy); /** * @brief Internal function reading a taxonomic ranks (.rdx) binary taxonomy file. * * @param ranks_file_name The name of the .rdx file to read. * * @returns A pointer on an ecorankidx_t structure. * @retval NULL if an error occurred. */ static ecorankidx_t* read_ranks_idx(const char* ranks_file_name); /** * @brief Internal function reading the taxa (.tdx, .ldx) binary taxonomy file. * * @param taxa_file_name The name of the .tdx file to read. * @param local_taxa_file_name The name of the .ldx file containing the local taxa to read if there is one. * * @returns A pointer on an ecotxidx_t structure. * @retval NULL if an error occurred. */ static ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name); /** * @brief Internal function reading a names (.ndx) binary taxonomy file. * * @param file_name The name of the .ndx file to read. * @param taxonomy A pointer on the taxonomy structure. * * @returns A pointer on an econameidx_t structure. * @retval NULL if an error occurred. */ static econameidx_t* read_names_idx(const char* file_name, OBIDMS_taxonomy_p taxonomy); /** * @brief Internal function reading a preferred names (.pdx) binary taxonomy file. * * @param file_name The name of the .pdx file to read. * @param taxonomy A pointer on the taxonomy structure. * * @returns A pointer on an econameidx_t structure. * @retval NULL if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static econameidx_t* read_preferred_names_idx(const char* file_name, OBIDMS_taxonomy_p taxonomy); /** * @brief Internal function reading a merged index (.adx) binary taxonomy file. * * @param file_name The name of the .adx file to read. * @param taxonomy A pointer on the taxonomy structure. * * @returns A pointer on an ecomergedidx_t structure. * @retval NULL if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static ecomergedidx_t* read_merged_idx(const char* file_name, OBIDMS_taxonomy_p taxonomy); /** * @brief Internal function writing a rank index (.rdx) binary taxonomy file. * * @param dms A pointer on the DMS. * @param tax A pointer on the taxonomy structure. * @param taxonomy_name The name of the taxonomy. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. */ static int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); /** * @brief Internal function writing a taxonomy index (.tdx) binary taxonomy file. * * @param dms A pointer on the DMS. * @param tax A pointer on the taxonomy structure. * @param taxonomy_name The name of the taxonomy. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. */ static int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); /** * @brief Internal function writing a local taxonomy index (.ldx) binary taxonomy file. * * @param dms A pointer on the DMS. * @param tax A pointer on the taxonomy structure. * @param taxonomy_name The name of the taxonomy. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. * * @since 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); /** * @brief Internal function writing a names index (.ndx) binary taxonomy file. * * @param dms A pointer on the DMS. * @param tax A pointer on the taxonomy structure. * @param taxonomy_name The name of the taxonomy. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. */ static int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); /** * @brief Internal function writing a preferred names index (.pdx) binary taxonomy file. * * @param dms A pointer on the DMS. * @param tax A pointer on the taxonomy structure. * @param taxonomy_name The name of the taxonomy. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); /** * @brief Internal function writing a merged index (.adx) binary taxonomy file. * * @param dms A pointer on the DMS. * @param tax A pointer on the taxonomy structure. * @param taxonomy_name The name of the taxonomy. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name); /** * @brief Internal function reading the 'nodes.dmp' file from an NCBI taxdump. * * @param taxdump The path to the taxdump. * @param tax A pointer on the taxonomy structure. * @param rank_names_p A char*** pointer on a non allocated char* array where the function will store rank names. * @param parent_taxids_p An int** pointer on a non allocated int array where the function will store parent taxids. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int read_nodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, char*** rank_names_p, int** parent_taxids_p); /** * @brief Internal function reading the 'delnodes.dmp' file from an NCBI taxdump. * * @param taxdump The path to the taxdump. * @param tax A pointer on the taxonomy structure. * @param delnodes_p An int** pointer on a non allocated int array where the function will store deleted taxids. * @param delnodes_count An int* pointer where the function will store the number of deleted taxids. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int read_delnodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t** delnodes_p, int32_t* delnodes_count); /** * @brief Internal function reading the 'merged.dmp' file from an NCBI taxdump. * * @warning Should be used AFTER read_nodes_dmp() and read_delnodes_dmp(). * * The function merges the information about current nodes previously read in read_nodes_dmp(), * the information about deleted nodes previously read in read_delnodes_dmp(), and the information read * in the 'merged.dmp' file, to build the final merged taxon index in the taxonomy structure. * * @param taxdump The path to the taxdump. * @param tax A pointer on the taxonomy structure. * @param delnodes An int* pointer containing the deleted taxids. * @param delnodes_count The number of deleted taxids. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnodes, int32_t delnodes_count); /** * @brief Internal function reading the 'names.dmp' file from an NCBI taxdump. * * @param taxdump The path to the taxdump. * @param tax A pointer on the taxonomy structure. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. * @retval -1 if an error occurred. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int read_names_dmp(const char* taxdump, OBIDMS_taxonomy_p tax); /************************************************************************ * * D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S * ************************************************************************/ static int cmp_rank_labels(const void* label1, const void* label2) { return strcmp((const char*)label1,*(const char**)label2); } static int cmp_taxids_in_ecotx_t(const void* ptaxid, const void* ptaxon) { ecotx_t* current_taxon = (ecotx_t*) ptaxon; int32_t taxid = (int32_t) ((size_t) ptaxid); return taxid - current_taxon->taxid; } static int cmp_taxids_in_ecomerged_t(const void* ptaxid, const void* ptaxon) { ecomerged_t* current_taxon = (ecomerged_t*) ptaxon; int32_t taxid = (int32_t) ((size_t) ptaxid); return taxid - current_taxon->taxid; } static int cmp_str(const void* s1, const void* s2) { return strcmp(*((char**)s1), *((char**)s2)); } static int cmp_names(const void* n1, const void* n2) { econame_t name1 = *((econame_t*)n1); econame_t name2 = *((econame_t*)n2); return strcmp(name1.name, name2.name); } static ecotx_t* get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid) { ecotx_t *current_taxon; int32_t count; count = (taxonomy->taxa)->count; current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid), (const void *) taxonomy->taxa->taxon, count, sizeof(ecotx_t), cmp_taxids_in_ecotx_t); return current_taxon; } static char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name) { char* all_tax_dir_path; char* tax_path; all_tax_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME); if (all_tax_dir_path == NULL) return NULL; tax_path = (char*) malloc((strlen(all_tax_dir_path) + strlen(tax_name) + 2)*sizeof(char)); if (tax_path == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxonomy path"); free(all_tax_dir_path); return NULL; } if (sprintf(tax_path, "%s/%s", all_tax_dir_path, tax_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building taxonomy path"); free(all_tax_dir_path); return NULL; } free(all_tax_dir_path); return tax_path; } static int32_t rank_label_to_index(const char* label, ecorankidx_t* ranks) { char **rep; rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), cmp_rank_labels); if (rep) return rep-ranks->label; return -1; } static FILE* open_ecorecorddb(const char* file_name, int32_t* count, int32_t abort_on_open_error) { FILE* f; int32_t read; f = fopen(file_name, "rb"); if (!f) { if (abort_on_open_error) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nCouldn't open a taxonomy file"); fclose(f); return NULL; } else { *count = 0; fclose(f); return NULL; } } read = fread(count, sizeof(int32_t), 1, f); if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy record size"); fclose(f); return NULL; } return f; } static void* read_ecorecord(FILE* f, int32_t* record_size) { static void* buffer = NULL; int32_t buffer_size = 0; int32_t read; if (!record_size) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: record_size can not be NULL"); return NULL; } read = fread(record_size, sizeof(int32_t), 1, f); if (feof(f)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: reached end of file"); return NULL; } if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: error reading record size"); return NULL; } if (buffer_size < *record_size) { if (buffer) buffer = realloc(buffer, *record_size); else buffer = malloc(*record_size); if (buffer == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading a taxonomy file: error allocating memory"); return NULL; } } read = fread(buffer, *record_size, 1, f); if (read != 1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a taxonomy file: error reading a record %d, %d", read, *record_size); free(buffer); return NULL; } return buffer; }; static ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon) { ecotxformat_t* raw; int32_t record_length; raw = read_ecorecord(f, &record_length); if (!raw) return NULL; taxon->parent = (ecotx_t*) ((size_t) raw->parent); taxon->taxid = raw->taxid; taxon->rank = raw->rank; taxon->farest = -1; taxon->name = malloc((raw->name_length+1) * sizeof(char)); if (taxon->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading a taxonomy file: error allocating memory"); return NULL; } strncpy(taxon->name, raw->name, raw->name_length); taxon->name[raw->name_length] = 0; // TODO note: this line is probably missing in ROBITaxonomy and source of a bug return taxon; } static econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy) { econameformat_t* raw; int32_t record_length; raw = read_ecorecord(f, &record_length); if (raw == NULL) return NULL; name->is_scientific_name = raw->is_scientific_name; name->name = malloc((raw->name_length + 1) * sizeof(char)); if (name->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon name"); free(raw); return NULL; } strncpy(name->name, raw->names, raw->name_length); name->name[raw->name_length] = 0; name->class_name = malloc((raw->class_length+1) * sizeof(char)); if (name->class_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon class name"); free(name->name); free(raw); return NULL; } strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length); name->class_name[raw->class_length] = 0; name->taxon = taxonomy->taxa->taxon + raw->taxid; return name; } static econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy) { econameformat_t* raw; int32_t record_length; raw = read_ecorecord(f, &record_length); if (raw == NULL) return NULL; name->is_scientific_name = raw->is_scientific_name; name->name = malloc((raw->name_length + 1) * sizeof(char)); if (name->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon preferred name"); free(raw); return NULL; } strncpy(name->name, raw->names, raw->name_length); name->name[raw->name_length] = 0; name->class_name = malloc((raw->class_length+1) * sizeof(char)); if (name->class_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon class name"); free(name->name); free(raw); return NULL; } strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length); name->class_name[raw->class_length] = 0; name->taxon = taxonomy->taxa->taxon + raw->taxid; // Add the preferred name in the taxon structure // TODO discuss: couldn't they all use the same pointer? (taxonomy->taxa->taxon + raw->taxid)->preferred_name = malloc((raw->name_length + 1) * sizeof(char)); if ((taxonomy->taxa->taxon + raw->taxid)->preferred_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon preferred name"); free(name->name); free(name->class_name); free(raw); return NULL; } strcpy((taxonomy->taxa->taxon + raw->taxid)->preferred_name, name->name); return name; } static ecorankidx_t* read_ranks_idx(const char* ranks_file_name) { int32_t count; FILE* ranks_file; ecorankidx_t* ranks_index; int32_t i; int32_t rank_length; char* buffer; ranks_file = open_ecorecorddb(ranks_file_name, &count, 0); if (ranks_file==NULL) return NULL; ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * count); if (ranks_index == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxonomy rank structure"); fclose(ranks_file); return NULL; } ranks_index->count = count; for (i=0; i < count; i++) { buffer = read_ecorecord(ranks_file, &rank_length); if (buffer == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading a value in a taxonomy file"); fclose(ranks_file); free(ranks_index); return NULL; } ranks_index->label[i] = (char*) malloc(rank_length+1); if (ranks_index->label[i] == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxonomy rank label"); fclose(ranks_file); free(ranks_index); free(buffer); return NULL; } strncpy(ranks_index->label[i], buffer, rank_length); (ranks_index->label[i])[rank_length] = 0; } fclose(ranks_file); return ranks_index; } static ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name) { int32_t count_taxa; int32_t count_local_taxa; FILE* f_taxa; FILE* f_local_taxa; ecotxidx_t* taxa_index; struct ecotxnode* t; int32_t i; int32_t j; f_taxa = open_ecorecorddb(taxa_file_name, &count_taxa, 1); if (f_taxa == NULL) { obidebug(1, "\nError reading taxonomy taxa file"); return NULL; } f_local_taxa = open_ecorecorddb(local_taxa_file_name, &count_local_taxa, 0); taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa)); if (taxa_index == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxonomy structure"); fclose(f_taxa); fclose(f_local_taxa); return NULL; } taxa_index->count = count_taxa + count_local_taxa; taxa_index->ncbi_count = count_taxa; taxa_index->local_count = count_local_taxa; taxa_index->buffer_size = taxa_index->count; taxa_index->max_taxid = 0; printf("Reading %d taxa...\n", count_taxa); for (i=0; itaxon[i])); taxa_index->taxon[i].idx = i; taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent->farest = 0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) taxa_index->max_taxid = taxa_index->taxon[i].taxid; taxa_index->taxon[i].preferred_name = NULL; } if (count_local_taxa > 0) printf("Reading %d local taxa...\n", count_local_taxa); else printf("No local taxa\n"); count_taxa = taxa_index->count; for (; i < count_taxa; i++){ readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i])); taxa_index->taxon[i].idx = i; taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent; taxa_index->taxon[i].parent->farest=0; if (taxa_index->taxon[i].taxid > taxa_index->max_taxid) taxa_index->max_taxid = taxa_index->taxon[i].taxid; taxa_index->taxon[i].preferred_name = NULL; } for (i=0; i < count_taxa; i++) { t = taxa_index->taxon+i; if (t->farest == -1) { t->farest=0; while (t->parent != t) { j = t->farest + 1; if (j > t->parent->farest) { t->parent->farest = j; t=t->parent; } else t = taxa_index->taxon; } } } fclose(f_taxa); if (f_local_taxa != NULL) fclose(f_local_taxa); return taxa_index; } static econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy) { int32_t count; FILE* f; econameidx_t* index_names; int32_t i; f = open_ecorecorddb(file_name, &count, 0); if (f == NULL) return NULL; index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count); if (index_names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading taxonomy name file"); return NULL; } index_names->count = count; for (i=0; i < count; i++) { readnext_econame(f, (index_names->names)+i, taxonomy); if ((index_names->names)+i == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy name file"); free(index_names); return NULL; } } fclose(f); return index_names; } static econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy) { int32_t count; FILE* f; econameidx_t* index_names; int32_t i; f = open_ecorecorddb(file_name, &count, 0); if (f == NULL) return NULL; index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count); if (index_names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading taxonomy name file"); return NULL; } index_names->count = count; for (i=0; i < count; i++) { readnext_ecopreferredname(f, (index_names->names)+i, taxonomy); if ((index_names->names)+i == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy name file"); free(index_names); return NULL; } } fclose(f); return index_names; } static ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy) { int32_t count; FILE* f; ecomergedidx_t* index_merged_idx; ecomerged_t* merged_idx; int32_t i; int32_t record_length; f = open_ecorecorddb(file_name, &count, 0); if (f == NULL) { obidebug(1, "\nError reading taxonomy name file"); return NULL; } index_merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + (sizeof(ecomerged_t) * count)); if (index_merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reading taxonomy name file"); return NULL; } index_merged_idx->count = count; for (i=0; i < count; i++) { merged_idx = read_ecorecord(f, &record_length); memcpy((index_merged_idx->merged)+i, merged_idx, record_length); if ((index_merged_idx->merged)+i == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy name file"); free(index_merged_idx); return NULL; } } fclose(f); return index_merged_idx; } static int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t length; // Compute file size file_size = sizeof(int32_t); for (i=0; i < (tax->ranks)->count; i++) { file_size = file_size + sizeof(int32_t); // To store label size file_size = file_size + strlen(((tax->ranks)->label)[i]); // To store label } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file %s", file_name); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank count if (write(file_descriptor, &((tax->ranks)->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write ranks for (i=0; i < (tax->ranks)->count; i++) { length = strlen(((tax->ranks)->label)[i]); // Write rank size if (write(file_descriptor, &length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank label if (write(file_descriptor, ((tax->ranks)->label)[i], length) < ((ssize_t) length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing an rdx taxonomy file"); return -1; } return 0; } static int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=0; i < (tax->taxa)->ncbi_count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->taxa->taxon[i].name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &(tax->taxa->ncbi_count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=0; i < (tax->taxa)->ncbi_count; i++) { name_length = strlen(tax->taxa->taxon[i].name); record_size = 4*sizeof(int32_t) + name_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid if (write(file_descriptor, &(tax->taxa->taxon[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank index if (write(file_descriptor, &(tax->taxa->taxon[i].rank), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write parent index if (write(file_descriptor, &((tax->taxa->taxon[i].parent)->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->taxa->taxon[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a tdx taxonomy file"); return -1; } return 0; } static int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=(tax->taxa)->ncbi_count; i < (tax->taxa)->count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->taxa->taxon[i].name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.ldx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &((tax->taxa)->local_count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=(tax->taxa)->ncbi_count; i < (tax->taxa)->count; i++) { name_length = strlen(tax->taxa->taxon[i].name); record_size = 4*sizeof(int32_t) + name_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid if (write(file_descriptor, &(tax->taxa->taxon[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write rank index if (write(file_descriptor, &(tax->taxa->taxon[i].rank), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write parent index if (write(file_descriptor, &((tax->taxa->taxon[i].parent)->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->taxa->taxon[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a ldx taxonomy file"); return -1; } return 0; } static int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t class_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=0; i < (tax->names)->count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->names->names[i].name); // To store name file_size = file_size + strlen(tax->names->names[i].class_name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &(tax->names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=0; i < tax->names->count; i++) { name_length = strlen(tax->names->names[i].name); class_length = strlen(tax->names->names[i].class_name); record_size = 4*sizeof(int32_t) + name_length + class_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write if the name is a scientific name if (write(file_descriptor, &(tax->names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write class length if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid index if (write(file_descriptor, &(tax->names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->names->names[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write class if (write(file_descriptor, tax->names->names[i].class_name, class_length) < ((ssize_t) class_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a ndx taxonomy file"); return -1; } return 0; } static int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t name_length; int32_t class_length; int32_t record_size; // Compute file size file_size = sizeof(int32_t); // To store record count for (i=0; i < (tax->preferred_names)->count; i++) { file_size = file_size + sizeof(int32_t) * 5; // To store record size, taxid, rank index, parent index, and name length file_size = file_size + strlen(tax->preferred_names->names[i].name); // To store name file_size = file_size + strlen(tax->preferred_names->names[i].class_name); // To store name } // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file"); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write record count if (write(file_descriptor, &(tax->preferred_names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write records for (i=0; i < tax->preferred_names->count; i++) { name_length = strlen(tax->preferred_names->names[i].name); class_length = strlen(tax->preferred_names->names[i].class_name); record_size = 4*sizeof(int32_t) + name_length + class_length; // Write record size if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write if the name is a scientific name if (write(file_descriptor, &(tax->preferred_names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name length if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write class length if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid index if (write(file_descriptor, &(tax->preferred_names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write name if (write(file_descriptor, tax->preferred_names->names[i].name, name_length) < ((ssize_t) name_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write class if (write(file_descriptor, tax->preferred_names->names[i].class_name, class_length) < ((ssize_t) class_length)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a pdx taxonomy file"); return -1; } return 0; } static int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name) // TODO prefix in taxonomy struct? { int i; char* file_name; int file_descriptor; off_t file_size; char* taxonomy_path; int32_t record_size; // Compute file size file_size = sizeof(int32_t) + (sizeof(int32_t) * 3 * (tax->merged_idx)->count); // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, taxonomy_name); file_name = (char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a binary taxonomy file name"); return -1; } // Build the file path if (sprintf(file_name, "%s/%s.adx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a binary taxonomy file name"); return -1; } free(taxonomy_path); // Create file file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (file_descriptor < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError creating a binary taxonomy file %s", file_name); free(file_name); return -1; } free(file_name); // Truncate the file to the right size if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError truncating a binary taxonomy file"); close(file_descriptor); return -1; } // Write merged indices count if (write(file_descriptor, &((tax->merged_idx)->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } record_size = 2 * sizeof(int32_t); // Write merged indices for (i=0; i < (tax->merged_idx)->count; i++) { // Write record size if (write(file_descriptor, &(record_size), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write taxid if (write(file_descriptor, &(((tax->merged_idx)->merged)[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } // Write index corresponding to the taxid in the ecotxidx_t structure if (write(file_descriptor, &(((tax->merged_idx)->merged)[i].idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t))) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError writing in a binary taxonomy file"); close(file_descriptor); return -1; } } // Close file if (close(file_descriptor) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing an adx taxonomy file"); return -1; } return 0; } int read_nodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, char*** rank_names_p, int** parent_taxids_p) { struct dirent* dp; DIR* tax_dir; FILE* file; char* file_name; bool file_found=false; char line[2048]; // TODO large enough? char* elt; int buffer_size; int i, n; buffer_size = 10000; // Initialize rank names and parent taxids arrays *parent_taxids_p = malloc(buffer_size * sizeof(int)); if (*parent_taxids_p == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the parent taxids array"); return -1; } *rank_names_p = malloc(buffer_size * sizeof(char*)); if (*rank_names_p == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the rank names array"); free(*parent_taxids_p); return -1; } // Open the taxdum directory tax_dir = opendir(taxdump); if (tax_dir == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxdump directory"); free(*parent_taxids_p); free(*rank_names_p); return -1; } // Look for the 'nodes.dmp' file while ((dp = readdir(tax_dir)) != NULL) { if (strcmp(dp->d_name, "nodes.dmp") == 0) { file_found = true; // Initializing the taxa structure tax->taxa = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * buffer_size); if (tax->taxa == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); free(*parent_taxids_p); free(*rank_names_p); closedir(tax_dir); return -1; } // Allocating the memory for the file name file_name = (char*) malloc((strlen(taxdump) + 11)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); free(*parent_taxids_p); free(*rank_names_p); closedir(tax_dir); return -1; } // Build the file path if (sprintf(file_name, "%s/nodes.dmp", taxdump) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a taxonomy file name for 'nodes.dmp'"); free(*parent_taxids_p); free(*rank_names_p); closedir(tax_dir); free(file_name); return -1; } file = fopen(file_name, "r"); if (file == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxonomy file"); free(*parent_taxids_p); free(*rank_names_p); closedir(tax_dir); free(file_name); return -1; } free(file_name); (tax->taxa)->max_taxid = 0; n = 0; while (fgets(line, sizeof(line), file)) { // Enlarge structures if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * buffer_size); if (tax->taxa == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); free(*parent_taxids_p); free(*rank_names_p); fclose(file); closedir(tax_dir); return -1; } *parent_taxids_p = (int*) realloc(*parent_taxids_p, sizeof(int) * buffer_size); if (*parent_taxids_p == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for the parent taxids array"); free(*parent_taxids_p); free(*rank_names_p); fclose(file); closedir(tax_dir); return -1; } *rank_names_p = (char**) realloc(*rank_names_p, sizeof(char*) * buffer_size); if (*rank_names_p == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for the rank names array"); free(*parent_taxids_p); free(*rank_names_p); fclose(file); closedir(tax_dir); return -1; } } // Check for terminal '\n' character (line complete) if (line[strlen(line) - 1] != '\n') { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file"); free(*parent_taxids_p); free(*rank_names_p); fclose(file); closedir(tax_dir); return -1; } (tax->taxa)->taxon[n].idx = n; // Parse 3 first elements separated by '|' elt = strtok(line, "|"); // Remove the last character (tab character) elt[strlen(elt)-1] = '\0'; // First element: taxid (tax->taxa)->taxon[n].taxid = atoi(elt); // Update max taxid if ((tax->taxa)->taxon[n].taxid > (tax->taxa)->max_taxid) (tax->taxa)->max_taxid = (tax->taxa)->taxon[n].taxid; // Initialize farest taxid value (tax->taxa)->taxon[n].farest = -1; i = 1; while (i < 3) { elt = strtok(NULL, "|"); // Remove the first and the last characters (tab characters) elt = elt+1; elt[strlen(elt)-1] = '\0'; if (i == 1) (*parent_taxids_p)[n] = atoi(elt); else if (i == 2) { (*rank_names_p)[n] = (char*) malloc((strlen(elt)+1) * sizeof(char)); if ((*rank_names_p)[n] == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxon rank name"); free(*parent_taxids_p); free(*rank_names_p); fclose(file); closedir(tax_dir); return -1; } strcpy((*rank_names_p)[n], elt); } i++; } n++; } // Check that fgets stopped because it reached EOF if (!feof(file)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: file reading was stopped before end of file"); free(*parent_taxids_p); free(*rank_names_p); fclose(file); closedir(tax_dir); return -1; } // Store count (tax->taxa)->count = n; (tax->taxa)->ncbi_count = n; (tax->taxa)->local_count = 0; // Truncate the structure memory to the right size tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (tax->taxa)->count); if (tax->taxa == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for taxonomy structure"); free(*parent_taxids_p); free(*rank_names_p); fclose(file); closedir(tax_dir); return -1; } if (fclose(file) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxdump file"); free(*parent_taxids_p); free(*rank_names_p); closedir(tax_dir); return -1; } } } if (closedir(tax_dir) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxdump directory"); free(*parent_taxids_p); free(*rank_names_p); closedir(tax_dir); return -1; } if ( ! file_found) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: could not find 'nodes.dmp' file in taxdump directory"); free(*parent_taxids_p); free(*rank_names_p); return -1; } return 0; } int read_delnodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t** delnodes_p, int32_t* delnodes_count) { struct dirent* dp; DIR* tax_dir; FILE* file; char* file_name; bool file_found=false; char line[2048]; // TODO large enough? char* elt; int buffer_size; int n; int old_taxid; buffer_size = 10000; // Initializing the list of deleted nodes *delnodes_p = (int32_t*) malloc(sizeof(int32_t) * buffer_size); if (*delnodes_p == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the deleted taxids array"); return -1; } tax_dir = opendir(taxdump); if (tax_dir == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxdump directory"); closedir(tax_dir); free(*delnodes_p); return -1; } // Go through taxonomy files while ((dp = readdir(tax_dir)) != NULL) { if (strcmp(dp->d_name, "delnodes.dmp") == 0) { file_found = true; // Allocating the memory for the file name file_name = (char*) malloc((strlen(taxdump) + 14)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); closedir(tax_dir); free(*delnodes_p); return -1; } // Build the file path if (sprintf(file_name, "%s/delnodes.dmp", taxdump) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a taxonomy file name"); closedir(tax_dir); free(*delnodes_p); free(file_name); return -1; } file = fopen(file_name, "r"); if (file == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxonomy file"); closedir(tax_dir); free(file_name); free(*delnodes_p); return -1; } free(file_name); n = 0; while (fgets(line, sizeof(line), file)) { // Check for terminal '\n' character (line complete) if (line[strlen(line) - 1] != '\n') { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file"); fclose(file); closedir(tax_dir); free(*delnodes_p); return -1; } // Get first and only element of the line (the deprecated taxid) elt = strtok(line, "|"); // Remove the last character (tab character) elt[strlen(elt)-1] = '\0'; // First element: old deprecated taxid old_taxid = atoi(elt); // Store the old taxid in the list of deleted taxids // Enlarge array if needed if (n == buffer_size) { buffer_size = buffer_size * 2; (*delnodes_p) = (int32_t*) realloc(*delnodes_p, sizeof(int32_t) * buffer_size); if ((*delnodes_p) == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); fclose(file); closedir(tax_dir); return -1; } } (*delnodes_p)[n] = old_taxid; n++; } // Check that fgets stopped because it reached EOF if (!feof(file)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: file reading was stopped before end of file"); fclose(file); closedir(tax_dir); free(*delnodes_p); return -1; } // Store count *delnodes_count = n; if (fclose(file) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxdump file"); free(*delnodes_p); closedir(tax_dir); return -1; } } } if (closedir(tax_dir) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxdump directory"); free(*delnodes_p); return -1; } if ( ! file_found) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: could not find 'delnodes.dmp' file in taxdump directory"); free(*delnodes_p); return -1; } return 0; } int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnodes, int32_t delnodes_count) { int n, nD, nT; int taxid, old_taxid; ecotx_t* t; struct dirent* dp; DIR* tax_dir; FILE* file; char* file_name; bool file_found=false; char line[2048]; // TODO large enough? char* elt; int buffer_size; buffer_size = 10000; tax_dir = opendir(taxdump); if (tax_dir == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxdump directory"); return -1; } // Go through taxonomy files while ((dp = readdir(tax_dir)) != NULL) { if (strcmp(dp->d_name, "merged.dmp") == 0) { file_found = true; // Initializing the merged structure tax->merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); closedir(tax_dir); return -1; } // Allocating the memory for the file name file_name = (char*) malloc((strlen(taxdump) + 12)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); closedir(tax_dir); return -1; } // Build the file path if (sprintf(file_name, "%s/merged.dmp", taxdump) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a taxonomy file name"); closedir(tax_dir); free(file_name); return -1; } file = fopen(file_name, "r"); if (file == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxonomy file"); closedir(tax_dir); free(file_name); return -1; } free(file_name); nT = 0; // to point in current taxa list while merging nD = delnodes_count-1; // to point in deleted taxids list while merging (going from count-1 to 0 because taxids are sorted in descending order) n = 0; // to point in final merged list while merging while (fgets(line, sizeof(line), file)) { // Check for terminal '\n' character (line complete) if (line[strlen(line) - 1] != '\n') { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file"); closedir(tax_dir); fclose(file); return -1; } // Parse the 2 elements separated by '|' // Get first element elt = strtok(line, "|"); // Remove the last character (tab character) elt[strlen(elt)-1] = '\0'; // First element: old deprecated taxid old_taxid = atoi(elt); // Get 2nd element: new taxid elt = strtok(NULL, "|"); // Remove the first and the last characters (tab characters) elt = elt+1; elt[strlen(elt)-1] = '\0'; taxid = atoi(elt); // Store the old taxid in the merged_idx ordered taxid list // The merged list is an ordered list of the current taxids, the deprecated taxids that have current references, // and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index // of the taxon in the taxa structure, or -1 for deleted taxids. // Creating the merged list requires to merge the 3 ordered lists into one. while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) || ((nD >= 0) && (delnodes[nD] < old_taxid))) { if ((tax->taxa)->taxon[nT].taxid < delnodes[nD]) { // Add element from taxa list // Enlarge structure if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); closedir(tax_dir); fclose(file); return -1; } } (tax->merged_idx)->merged[n].taxid = (tax->taxa)->taxon[nT].taxid; (tax->merged_idx)->merged[n].idx = nT; nT++; n++; } else if (delnodes[nD] < (tax->taxa)->taxon[nT].taxid) { // Add element from deleted taxids list // Enlarge structure if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); closedir(tax_dir); fclose(file); return -1; } } (tax->merged_idx)->merged[n].taxid = delnodes[nD]; (tax->merged_idx)->merged[n].idx = -1; // The index to tag deleted taxids is -1 nD--; n++; } } // Add the deprecated taxid // Enlarge structure if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); closedir(tax_dir); fclose(file); return -1; } } // Store the deprecated taxid with the index that refers to the new taxid // Find the index of the new taxid t = get_taxon_with_current_taxid(tax, taxid); // Store the old taxid with the index (tax->merged_idx)->merged[n].taxid = old_taxid; (tax->merged_idx)->merged[n].idx = t->idx; n++; } // Check that fgets stopped because it reached EOF if (!feof(file)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: file reading was stopped before end of file"); closedir(tax_dir); fclose(file); return -1; } // Store count (tax->merged_idx)->count = n; // Truncate the structure memory to the right size tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * (tax->merged_idx)->count); if (tax->merged_idx == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a a taxonomy structure"); closedir(tax_dir); fclose(file); return -1; } if (fclose(file) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxdump file"); closedir(tax_dir); return -1; } } } if (closedir(tax_dir) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxdump directory"); closedir(tax_dir); return -1; } if ( ! file_found) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: could not find 'merged.dmp' file in taxdump directory"); return -1; } // Free delnodes array, not needed anymore free(delnodes); return 0; } int read_names_dmp(const char* taxdump, OBIDMS_taxonomy_p tax) { int i, j, n; int taxid; struct dirent* dp; DIR* tax_dir; FILE* file; char* file_name; bool file_found=false; char line[2048]; // TODO large enough? char* elt; int buffer_size; buffer_size = 10000; tax_dir = opendir(taxdump); if (tax_dir == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxdump directory"); return -1; } // Go through taxonomy files while ((dp = readdir(tax_dir)) != NULL) { if (strcmp(dp->d_name, "names.dmp") == 0) { file_found = true; // Initializing the names structure tax->names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * buffer_size); if (tax->names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); closedir(tax_dir); return -1; } // Allocating the memory for the file name file_name = (char*) malloc((strlen(taxdump) + 11)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a file name"); closedir(tax_dir); return -1; } // Build the file path if (sprintf(file_name, "%s/names.dmp", taxdump) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building a taxonomy file name"); free(file_name); closedir(tax_dir); return -1; } file = fopen(file_name, "r"); if (file == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem opening a taxonomy file"); free(file_name); closedir(tax_dir); return -1; } free(file_name); n = 0; j = 0; while (fgets(line, sizeof(line), file)) { // Enlarge structures if needed if (n == buffer_size) { buffer_size = buffer_size * 2; tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * buffer_size); if (tax->names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure"); fclose(file); closedir(tax_dir); return -1; } } // Check for terminal '\n' character (line complete) if (line[strlen(line) - 1] != '\n') { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file"); fclose(file); closedir(tax_dir); return -1; } // Parse 4 first elements separated by '|' elt = strtok(line, "|"); // Remove the last character (tab character) elt[strlen(elt)-1] = '\0'; // First element: taxid taxid = atoi(elt); // Find taxid in taxa structure and store pointer in names structure i = j; while ((i < (tax->taxa)->count) && ((tax->taxa)->taxon[i].taxid != taxid)) i++; if (i == (tax->taxa)->count) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: could not find taxon associated to name when reading taxdump"); fclose(file); closedir(tax_dir); return -1; } j = i; // Because there are several names by taxon but they are in the same order (tax->names)->names[n].taxon = ((tax->taxa)->taxon)+i; i = 1; while (i < 4) { elt = strtok(NULL, "|"); // Remove the first and the last characters (tab characters) elt = elt+1; elt[strlen(elt)-1] = '\0'; if (i == 1) // Name { (tax->names)->names[n].name = (char*) malloc((strlen(elt) + 1) * sizeof(char)); if ((tax->names)->names[n].name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon name"); obi_close_taxonomy(tax); fclose(file); closedir(tax_dir); return -1; } strcpy((tax->names)->names[n].name, elt); } else if (i == 3) // Class name { (tax->names)->names[n].class_name = (char*) malloc((strlen(elt) + 1) * sizeof(char)); if ((tax->names)->names[n].class_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon class name"); fclose(file); closedir(tax_dir); return -1; } strcpy((tax->names)->names[n].class_name, elt); if (strcmp(elt, "scientific name") == 0) { (tax->names)->names[n].is_scientific_name = 1; } else (tax->names)->names[n].is_scientific_name = 0; } i++; } n++; } // Check that fgets stopped because it reached EOF if (!feof(file)) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: file reading was stopped before end of file"); fclose(file); closedir(tax_dir); return -1; } // Store count (tax->names)->count = n; // Truncate the structure memory to the right size tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * (tax->names)->count); if (tax->names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a a taxonomy structure"); fclose(file); closedir(tax_dir); return -1; } if (fclose(file) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxdump file"); closedir(tax_dir); return -1; } } } if (closedir(tax_dir) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxdump directory"); closedir(tax_dir); return -1; } if ( ! file_found) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: could not find 'merged.dmp' file in taxdump directory"); return -1; } return 0; } /********************************************************************** * * D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S * **********************************************************************/ int obi_taxonomy_exists(OBIDMS_p dms, const char* taxonomy_name) { char* taxonomy_path; DIR* dir; taxonomy_path = get_taxonomy_path(dms, taxonomy_name); if (taxonomy_path == NULL) return -1; dir = opendir(taxonomy_path); if (dir) { /* Directory exists. */ closedir(dir); return 1; } else if (ENOENT == errno) { /* Directory does not exist. */ return 0; } else { /* opendir() failed for some other reason. */ return -1; } } OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump) { OBIDMS_taxonomy_p tax; char** rank_names=NULL; int* parent_taxids=NULL; int32_t* delnodes=NULL; int32_t delnodes_count; bool already_in; ecotx_t* t; int buffer_size; int i, j; // Initialize taxonomy structure tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t)); if (tax == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a taxonomy structure"); return NULL; } tax->ranks = NULL; tax->taxa = NULL; tax->names = NULL; tax->preferred_names = NULL; tax->merged_idx = NULL; tax->dms = NULL; (tax->tax_name)[0] = '\0'; // TODO check if taxdump path is for a gz file to unzip or a directory // READ NODES.DMP if (read_nodes_dmp(taxdump, tax, &rank_names, &parent_taxids) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem reading 'nodes.dmp'"); obi_close_taxonomy(tax); return NULL; } // READ DELNODES.DMP if (read_delnodes_dmp(taxdump, tax, &delnodes, &delnodes_count) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem reading 'delnodes.dmp'"); obi_close_taxonomy(tax); free(rank_names); free(parent_taxids); return NULL; } // READ MERGED.DMP if (read_merged_dmp(taxdump, tax, delnodes, delnodes_count) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem reading 'merged.dmp'"); obi_close_taxonomy(tax); free(delnodes); free(rank_names); free(parent_taxids); return NULL; } // READ NAMES.DMP if (read_names_dmp(taxdump, tax) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nProblem reading 'names.dmp'"); obi_close_taxonomy(tax); free(rank_names); free(parent_taxids); return NULL; } // Go through data to fill the taxonomy structure // Build rank list // Initialize rank structure buffer_size = 10; tax->ranks = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * buffer_size); if (tax->ranks == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxon rank array"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } (tax->ranks)->count = 0; for (i=0; i < (tax->taxa)->count; i++) { already_in = false; for (j=0; j < (tax->ranks)->count; j++) { if (strcmp(rank_names[i], ((tax->ranks)->label)[j]) == 0) { already_in = true; break; } } if (!already_in) { // Realloc rank structure if needed if ((tax->ranks)->count == buffer_size) { buffer_size = buffer_size + 10; tax->ranks = (ecorankidx_t*) realloc(tax->ranks, sizeof(ecorankidx_t) + sizeof(char*) * buffer_size); if (tax->ranks == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for taxon ranks"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } } // Store new rank ((tax->ranks)->label)[(tax->ranks)->count] = (char*) malloc((strlen(rank_names[i]) + 1) * sizeof(char)); if (((tax->ranks)->label)[(tax->ranks)->count] == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxon rank names"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } strcpy(((tax->ranks)->label)[(tax->ranks)->count], rank_names[i]); ((tax->ranks)->count)++; } } // Truncate to the number of ranks recorded tax->ranks = (ecorankidx_t*) realloc(tax->ranks, sizeof(ecorankidx_t) + sizeof(char*) * (tax->ranks)->count); if (tax->ranks == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for taxon ranks"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } // Sort in alphabetical order qsort((tax->ranks)->label, (tax->ranks)->count, sizeof(char*), cmp_str); // Associate the taxa with their rank indices for (i=0; i < (tax->taxa)->count; i++) { for (j=0; j < (tax->ranks)->count; j++) { if (strcmp(rank_names[i], ((tax->ranks)->label)[j]) == 0) { ((tax->taxa)->taxon)[i].rank = j; break; } } } // Associate the taxa with their scientific name for (i=0; i < (tax->names)->count; i++) { if ((tax->names)->names[i].is_scientific_name) { ((tax->names)->names[i].taxon)->name = (char*) malloc((strlen((((tax->names)->names)[i]).name) + 1) * sizeof(char)); if (((tax->names)->names[i].taxon)->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for taxon ranks"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } strcpy(((tax->names)->names[i].taxon)->name, (((tax->names)->names)[i]).name); } } // Sort names in alphabetical order qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names); // Associate the taxa with their parent for (i=0; i < (tax->taxa)->count; i++) { ((tax->taxa)->taxon)[i].parent = get_taxon_with_current_taxid(tax, parent_taxids[i]); if (((tax->taxa)->taxon)[i].parent == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: taxon parent not found"); obi_close_taxonomy(tax); free(parent_taxids); free(rank_names); return NULL; } (((tax->taxa)->taxon)[i].parent)->farest = 0; } // Initialize preferred names to NULL for (i=0; i < (tax->taxa)->count; i++) ((tax->taxa)->taxon)[i].preferred_name = NULL; (tax->taxa)->buffer_size = (tax->taxa)->count; // Compute longest branches (used to compute distances between taxa faster) for (i=0; i < (tax->taxa)->count; i++) { t = (((tax->taxa))->taxon)+i; if (t->farest == -1) { t->farest=0; while (t->parent != t) { j = t->farest + 1; if (j > t->parent->farest) { t->parent->farest = j; t=t->parent; } else t = (tax->taxa)->taxon; } } } // Freeing free(parent_taxids); for (i=0; i < (tax->taxa)->count; i++) free(rank_names[i]); free(rank_names); return tax; } OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names) { OBIDMS_taxonomy_p tax; char* taxonomy_path; char* ranks_file_name; char* taxa_file_name; char* merged_idx_file_name; char* local_taxa_file_name; char* alter_names_file_name; char* pref_names_file_name; int buffer_size; tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t)); if (tax == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxonomy structure"); return NULL; } tax->ranks = NULL; tax->taxa = NULL; tax->names = NULL; tax->preferred_names = NULL; tax->merged_idx = NULL; tax->dms = dms; strcpy(tax->tax_name, taxonomy_name); buffer_size = 2048; taxonomy_path = get_taxonomy_path(dms, taxonomy_name); if (taxonomy_path == NULL) return NULL; // Read ranks ranks_file_name = (char*) malloc(buffer_size*sizeof(char)); if (ranks_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for ranks file name"); free(taxonomy_path); free(tax); return NULL; } if (snprintf(ranks_file_name, buffer_size, "%s/%s.rdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building ranks file name"); free(taxonomy_path); free(ranks_file_name); free(tax); return NULL; } tax->ranks = read_ranks_idx(ranks_file_name); if (tax->ranks == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError reading taxonomy ranks file (check taxonomy name spelling)"); free(taxonomy_path); free(ranks_file_name); free(tax); return NULL; } free(ranks_file_name); // Read taxa taxa_file_name = (char*) malloc(buffer_size*sizeof(char)); if (taxa_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxa file name"); free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(taxa_file_name, buffer_size, "%s/%s.tdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building taxa file name"); free(taxonomy_path); free(taxa_file_name); obi_close_taxonomy(tax); return NULL; } local_taxa_file_name = (char*) malloc(buffer_size*sizeof(char)); if (local_taxa_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for taxa file name"); free(taxonomy_path); free(taxa_file_name); obi_close_taxonomy(tax); return NULL; } if (snprintf(local_taxa_file_name, buffer_size, "%s/%s.ldx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building local taxa file name"); free(taxonomy_path); free(taxa_file_name); free(local_taxa_file_name); obi_close_taxonomy(tax); return NULL; } tax->taxa = read_taxonomy_idx(taxa_file_name, local_taxa_file_name); if (tax->taxa == NULL) { free(taxonomy_path); free(taxa_file_name); free(local_taxa_file_name); obi_close_taxonomy(tax); return NULL; } free(taxa_file_name); free(local_taxa_file_name); // Read merged index (old and current taxids referring to indices in the taxa structure) merged_idx_file_name = (char*) malloc(buffer_size*sizeof(char)); if (merged_idx_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for merged index file name"); free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(merged_idx_file_name, buffer_size, "%s/%s.adx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building merged index file name"); free(taxonomy_path); free(merged_idx_file_name); obi_close_taxonomy(tax); return NULL; } tax->merged_idx = read_merged_idx(merged_idx_file_name, tax); if (tax->merged_idx == NULL) { free(taxonomy_path); free(merged_idx_file_name); obi_close_taxonomy(tax); return NULL; } free(merged_idx_file_name); // Read preferred names pref_names_file_name = (char*) malloc(buffer_size*sizeof(char)); if (pref_names_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for alternative names file name"); free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(pref_names_file_name, buffer_size, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building alternative names file name"); free(taxonomy_path); free(pref_names_file_name); obi_close_taxonomy(tax); return NULL; } tax->preferred_names = read_preferred_names_idx(pref_names_file_name, tax); if (obi_errno) { free(taxonomy_path); free(pref_names_file_name); obi_close_taxonomy(tax); return NULL; } free(pref_names_file_name); if (tax->preferred_names != NULL) fprintf(stderr, "\nPreferred names read"); // Read alternative names if (read_alternative_names) { alter_names_file_name = (char*) malloc(buffer_size*sizeof(char)); if (alter_names_file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for alternative names file name"); free(taxonomy_path); obi_close_taxonomy(tax); return NULL; } if (snprintf(alter_names_file_name, buffer_size, "%s/%s.ndx", taxonomy_path, taxonomy_name) < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError building alternative names file name"); free(taxonomy_path); free(alter_names_file_name); obi_close_taxonomy(tax); return NULL; } tax->names = read_names_idx(alter_names_file_name, tax); if (tax->names == NULL) { free(taxonomy_path); free(alter_names_file_name); obi_close_taxonomy(tax); return NULL; } free(alter_names_file_name); } free(taxonomy_path); return tax; } int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name) { char* taxonomy_path; // Build the taxonomy directory path taxonomy_path = get_taxonomy_path(dms, tax_name); if (taxonomy_path == NULL) return -1; // Try to create the directory if (mkdir(taxonomy_path, 00777) < 0) { if (errno == EEXIST) obidebug(1, "\nA taxonomy already exists with this name."); obidebug(1, "\nProblem creating a new taxonomy directory"); free(taxonomy_path); return -1; } free(taxonomy_path); if (write_ranks_idx(dms, tax, tax_name) < 0) return -1; if (write_taxonomy_idx(dms, tax, tax_name) < 0) return -1; if (write_names_idx(dms, tax, tax_name) < 0) return -1; if (write_merged_idx(dms, tax, tax_name) < 0) return -1; // Check if there are local taxa (if so last taxon is local) if ((tax->taxa)->local_count > 0) if (write_local_taxonomy_idx(dms, tax, tax_name) < 0) return -1; // Write preferred names if there are some if (tax->preferred_names != NULL) if (write_preferred_names_idx(dms, tax, tax_name) < 0) return -1; return 0; } int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy) { int i; if (taxonomy) { // Update local informations (local taxa and preferred names) if there are any if ((taxonomy->taxa)->local_count > 0) { if (taxonomy->dms == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError closing a taxonomy with local files but no DMS associated (probably read directly from taxdump)"); // TODO discuss } if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0) return -1; } // Write preferred names if there are some if (taxonomy->preferred_names) { if (write_preferred_names_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0) return -1; // Free preferred names for (i=0; i < (taxonomy->preferred_names)->count; i++) { if (((taxonomy->preferred_names)->names[i]).name) free(((taxonomy->preferred_names)->names[i]).name); if (((taxonomy->preferred_names)->names[i]).class_name) free(((taxonomy->preferred_names)->names[i]).class_name); } free(taxonomy->preferred_names); } if (taxonomy->ranks) { for (i=0; i < (taxonomy->ranks)->count; i++) { if ((taxonomy->ranks)->label[i]) free((taxonomy->ranks)->label[i]); } free(taxonomy->ranks); } if (taxonomy->names) { for (i=0; i < (taxonomy->names)->count; i++) { if (((taxonomy->names)->names[i]).name) free(((taxonomy->names)->names[i]).name); if (((taxonomy->names)->names[i]).class_name) free(((taxonomy->names)->names[i]).class_name); } free(taxonomy->names); } if (taxonomy->taxa) { for (i=0; i < (taxonomy->taxa)->count; i++) { if (((taxonomy->taxa)->taxon[i]).name) free(((taxonomy->taxa)->taxon[i]).name); } free(taxonomy->taxa); } if (taxonomy->merged_idx) { free(taxonomy->merged_idx); } free(taxonomy); } return 0; } int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid) { int32_t taxid; ecotx_t* taxon; int i; // econame_t* name_struct; // Enlarge the structure memory for a new taxon tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (((tax->taxa)->count) + 1)); if (tax->taxa == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon"); return -1; } // Compute new taxid that must be equal or greater than 1E7 and greater than the maximum taxid existing in the taxonomy if (min_taxid < MIN_LOCAL_TAXID) min_taxid = MIN_LOCAL_TAXID; if (min_taxid > (tax->taxa)->max_taxid) taxid = min_taxid; else taxid = ((tax->taxa)->max_taxid) + 1; // Fill the ecotx_t node structure taxon = ((tax->taxa)->taxon)+((tax->taxa)->count); taxon->taxid = taxid; taxon->idx = (tax->taxa)->count; taxon->local = true; taxon->name = (char*) malloc((strlen(name) + 1) * sizeof(char)); if (taxon->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon name to add a new taxon"); return -1; } strcpy(taxon->name, name); taxon->rank = -1; for (i=0; i < (tax->ranks)->count; i++) { if (strcmp(rank_name, ((tax->ranks)->label)[i]) == 0) { taxon->rank = i; break; } } if (taxon->rank == -1) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: taxon rank not found when adding a new taxon"); return -1; } taxon->parent = obi_taxo_get_taxon_with_taxid(tax, parent_taxid); if (taxon->parent == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError: taxon parent not found when adding a new taxon"); return -1; } taxon->farest = 0; // Update taxonomy counts etc (tax->taxa)->max_taxid = taxid; ((tax->taxa)->count)++; ((tax->taxa)->local_count)++; (tax->taxa)->buffer_size = (tax->taxa)->count; // // Add new name in names structure // Commented because the new name was not added in the .ndx file in the OBITools1 // // Allocate memory for new name // tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1)); // if (tax->names == NULL) // { // obi_set_errno(OBI_MALLOC_ERROR); // obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon"); // return -1; // } // // // Add new name // name_struct = (tax->names)->names + ((tax->names)->count); // name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char)); // if (name_struct->name == NULL) // { // obi_set_errno(OBI_MALLOC_ERROR); // obidebug(1, "\nError allocating memory for a taxon name to add a new taxon"); // return -1; // } // strcpy(name_struct->name, name); // name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char)); // if (name_struct->class_name == NULL) // { // obi_set_errno(OBI_MALLOC_ERROR); // obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon"); // return -1; // } // strcpy(name_struct->class_name, "scientific name"); // name_struct->is_scientific_name = true; // name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1; // // // Sort names in alphabetical order // qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names); // // // Update name count // ((tax->names)->count)++; return taxid; } int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name) { ecotx_t* taxon; taxon = obi_taxo_get_taxon_with_taxid(tax, taxid); return obi_taxo_add_preferred_name_with_taxon(tax, taxon, preferred_name); } int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name) { econame_t* name_struct; // Free previous preferred name if there is one if (taxon->preferred_name != NULL) free(taxon->preferred_name); taxon->preferred_name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char)); if (taxon->preferred_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a new preferred name for a taxon"); return -1; } strcpy(taxon->preferred_name, preferred_name); // Add new name in preferred names structure // Allocate or reallocate memory for new name if (tax->preferred_names == NULL) { tax->preferred_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t)); (tax->preferred_names)->count = 0; } else tax->preferred_names = (econameidx_t*) realloc(tax->preferred_names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->preferred_names)->count + 1)); if (tax->preferred_names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new preferred name"); return -1; } // Add new preferred name name_struct = (tax->preferred_names)->names + ((tax->preferred_names)->count); name_struct->name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char)); if (name_struct->name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a new taxon preferred name"); return -1; } strcpy(name_struct->name, preferred_name); name_struct->class_name = (char*) malloc((strlen("preferred name") + 1) * sizeof(char)); if (name_struct->class_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for a taxon class name to add a new preferred name"); return -1; } strcpy(name_struct->class_name, "preferred name"); name_struct->is_scientific_name = false; name_struct->taxon = taxon; // Sort preferred names in alphabetical order qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names); // Update preferred name count ((tax->preferred_names)->count)++; return 0; } ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx) { ecotx_t* current_taxon; ecotx_t* next_taxon; if (taxon == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError getting the parent of a taxon at a given rank: taxon pointer is NULL"); return NULL; } current_taxon = taxon; next_taxon = current_taxon->parent; while ((current_taxon != next_taxon) && // root node (current_taxon->rank != rankidx)) { current_taxon = next_taxon; next_taxon = current_taxon->parent; } if (current_taxon->rank == rankidx) return current_taxon; else return NULL; } ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid) { ecotx_t *current_taxon; ecomerged_t *indexed_taxon; int32_t count; if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get a taxon with its taxid: taxonomy pointer is NULL"); return NULL; } count = (taxonomy->merged_idx)->count; indexed_taxon = (ecomerged_t*) bsearch((const void *) ((size_t) taxid), (const void *) taxonomy->merged_idx->merged, count, sizeof(ecomerged_t), cmp_taxids_in_ecomerged_t); if (indexed_taxon == NULL) current_taxon = NULL; else if (indexed_taxon->idx == -1) current_taxon = NULL; // TODO discuss what to do when old deleted taxon else current_taxon = (taxonomy->taxa->taxon)+(indexed_taxon->idx); return current_taxon; } int obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid) // TODO discuss that this doesn't work with deprecated taxids { ecotx_t* next_parent; next_parent = taxon->parent; if (taxon == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError checking if a taxon is under another: taxon pointer is NULL"); return -1; } while ((other_taxid != next_parent->taxid) && (strcmp(next_parent->name, "root"))) next_parent = next_parent->parent; if (other_taxid == next_parent->taxid) return 1; else return 0; } ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static int32_t rankindex = -1; if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the species associated with a taxon: taxonomy pointer is NULL"); return NULL; } if (taxon == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the species associated with a taxon: taxon pointer is NULL"); return NULL; } rankindex = rank_label_to_index("species", taxonomy->ranks); if (rankindex < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the species associated with a taxon: error getting rank index"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static int32_t rankindex = -1; if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the genus associated with a taxon: taxonomy pointer is NULL"); return NULL; } if (taxon == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the genus associated with a taxon: taxon pointer is NULL"); return NULL; } rankindex = rank_label_to_index("genus", taxonomy->ranks); if (rankindex < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the genus associated with a taxon: error getting rank index"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static int32_t rankindex = -1; if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the family associated with a taxon: taxonomy pointer is NULL"); return NULL; } if (taxon == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the family associated with a taxon: taxon pointer is NULL"); return NULL; } rankindex = rank_label_to_index("family", taxonomy->ranks); if (rankindex < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the family associated with a taxon: error getting rank index"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static int32_t rankindex = -1; if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the kingdom associated with a taxon: taxonomy pointer is NULL"); return NULL; } if (taxon == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the kingdom associated with a taxon: taxon pointer is NULL"); return NULL; } rankindex = rank_label_to_index("kingdom", taxonomy->ranks); if (rankindex < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the kingdom associated with a taxon: error getting rank index"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy) { static int32_t rankindex = -1; if (taxonomy == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the superkingdom associated with a taxon: taxonomy pointer is NULL"); return NULL; } if (taxon == NULL) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the superkingdom associated with a taxon: taxon pointer is NULL"); return NULL; } rankindex = rank_label_to_index("superkingdom", taxonomy->ranks); if (rankindex < 0) { obi_set_errno(OBI_TAXONOMY_ERROR); obidebug(1, "\nError trying to get the superkingdom associated with a taxon: error getting rank index"); return NULL; } return obi_taxo_get_parent_at_rank(taxon, rankindex); } const char* obi_taxo_rank_index_to_label(int32_t rank_idx, ecorankidx_t* ranks) { return (ranks->label)[rank_idx]; } int obi_taxo_is_taxid_included(OBIDMS_taxonomy_p taxonomy, int32_t* restrict_to_taxids, int32_t count, int32_t taxid) { int i; ecotx_t* taxon; taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid); if (taxon) for (i=0; i < count; i++) if ((taxon->taxid == restrict_to_taxids[i]) || (obi_taxo_is_taxon_under_taxid(taxon, restrict_to_taxids[i]))) return 1; return 0; }