Taxonomy: documentation for all the functions, and fixed bugs when
closing the taxonomy (overwriting of .pdx files, missing freeing, and re-placed a misplaced condition)
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
||||
* @file obidms_taxonomy.h
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date March 2nd 2016
|
||||
* @brief Header file for the functions handling the reading of binary taxonomy files.
|
||||
* @brief Header file for the functions handling the reading and writing of taxonomy files.
|
||||
*/
|
||||
|
||||
|
||||
@ -17,123 +17,384 @@
|
||||
#include "obidms.h"
|
||||
|
||||
|
||||
#define MIN_LOCAL_TAXID (10000000)
|
||||
#define TAX_NAME_LEN (1024)
|
||||
#define MIN_LOCAL_TAXID (10000000) /**< The minimum taxid for a taxon added locally (i.e. not an NCBI taxon).
|
||||
*/
|
||||
#define TAX_NAME_LEN (1024) /**< The maximum length for the taxonomy name.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxon as stored in a .tdx file.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
int32_t parent;
|
||||
int32_t name_length;
|
||||
char name[];
|
||||
int32_t taxid; /**< Taxid.
|
||||
*/
|
||||
int32_t rank; /**< Rank index.
|
||||
*/
|
||||
int32_t parent; /**< Index, in the taxid index, of the parent node in the taxonomic tree.
|
||||
*/
|
||||
int32_t name_length; /**< Length of the taxon scientific name.
|
||||
*/
|
||||
char name[]; /**< Scientific name of the taxon.
|
||||
*/
|
||||
} ecotxformat_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxon as stored in a taxonomy structure.
|
||||
*/
|
||||
typedef struct ecotxnode {
|
||||
int32_t taxid; // TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
|
||||
int32_t rank;
|
||||
int32_t farest;
|
||||
int32_t idx;
|
||||
struct ecotxnode* parent;
|
||||
char* name; // scientific name
|
||||
char* preferred_name; // preferred name
|
||||
bool local;
|
||||
int32_t taxid; /**< Taxid. // TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
|
||||
*/
|
||||
int32_t rank; /**< Rank index in ecorankidx_t structure.
|
||||
*/
|
||||
int32_t farest; /**< Longest branch length, used to compute distances between taxa faster.
|
||||
*/
|
||||
int32_t idx; /**< Index in the ecotxidx_t structure.
|
||||
*/
|
||||
struct ecotxnode* parent; /**< Pointer on the parent node in the taxonomic tree.
|
||||
*/
|
||||
char* name; /**< Scientific name of the taxon.
|
||||
*/
|
||||
char* preferred_name; /**< Preferred name of the taxon if there is one, otherwise NULL.
|
||||
*/
|
||||
bool local; /**< A boolean indicating whether the taxon is local or not.
|
||||
*/
|
||||
} ecotx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for the taxon index in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
int32_t ncbi_count;
|
||||
int32_t local_count;
|
||||
int32_t max_taxid;
|
||||
int32_t buffer_size;
|
||||
ecotx_t taxon[];
|
||||
int32_t count; /**< Number of taxa.
|
||||
*/
|
||||
int32_t ncbi_count; /**< Number of NCBI taxa.
|
||||
*/
|
||||
int32_t local_count; /**< Number of taxa added locally.
|
||||
*/
|
||||
int32_t max_taxid; /**< Maximum taxid existing in the taxon index.
|
||||
*/
|
||||
int32_t buffer_size; /**< Number of taxa. // TODO kept this but not sure of its use
|
||||
*/
|
||||
ecotx_t taxon[]; /**< Taxon array.
|
||||
*/
|
||||
} ecotxidx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for the rank index in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
char* label[];
|
||||
int32_t count; /**< Number of ranks.
|
||||
*/
|
||||
char* label[]; /**< Array of rank names.
|
||||
*/
|
||||
} ecorankidx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxon name as stored in a .ndx file.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t is_scientific_name;
|
||||
int32_t name_length;
|
||||
int32_t class_length;
|
||||
int32_t taxid; // taxid idx
|
||||
char names[];
|
||||
int32_t is_scientific_name; /**< A boolean indicating whether the name is a scientific name or not.
|
||||
*/
|
||||
int32_t name_length; /**< The name length.
|
||||
*/
|
||||
int32_t class_length; /**< The name class length.
|
||||
*/
|
||||
int32_t taxid; /**< Index of the taxon in the taxid index.
|
||||
*/
|
||||
char names[]; /**< Taxon name and name class concatenated.
|
||||
*/
|
||||
} econameformat_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxon name as stored in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
char* name;
|
||||
char* class_name;
|
||||
int32_t is_scientific_name;
|
||||
struct ecotxnode* taxon;
|
||||
char* name; /**< Taxon name.
|
||||
*/
|
||||
char* class_name; /**< Name class.
|
||||
*/
|
||||
int32_t is_scientific_name; /**< A boolean indicating whether the name is a scientific name or not.
|
||||
*/
|
||||
struct ecotxnode* taxon; /**< Pointer on the taxon in the taxon index.
|
||||
*/
|
||||
} econame_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for the name index in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
econame_t names[];
|
||||
int32_t count; /**< Number of names.
|
||||
*/
|
||||
econame_t names[]; /**< Array of names.
|
||||
*/
|
||||
} econameidx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxid/index pair as stored in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t idx;
|
||||
int32_t taxid; /**< Taxid.
|
||||
*/
|
||||
int32_t idx; /**< Index of the taxid in the taxon index, -1 if the taxid is deprecated.
|
||||
*/
|
||||
} ecomerged_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a merged taxid index in a taxonomy structure.
|
||||
*
|
||||
* This index includes all deprecated taxids that now refer to different taxids, and
|
||||
* the deprecated taxids that are deleted.
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
ecomerged_t merged[];
|
||||
int32_t count; /**< Number of taxid/index pairs.
|
||||
*/
|
||||
ecomerged_t merged[]; /**< Array of taxid/index pairs.
|
||||
*/
|
||||
} ecomergedidx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxonomy.
|
||||
*/
|
||||
typedef struct OBIDMS_taxonomy_t {
|
||||
char tax_name[TAX_NAME_LEN];
|
||||
OBIDMS_p dms;
|
||||
ecomergedidx_t* merged_idx;
|
||||
ecorankidx_t* ranks;
|
||||
econameidx_t* names;
|
||||
econameidx_t* preferred_names;
|
||||
ecotxidx_t* taxa;
|
||||
char tax_name[TAX_NAME_LEN]; /**< Taxonomy name.
|
||||
*/
|
||||
OBIDMS_p dms; /**< A pointer on the DMS to which the taxonomy belongs.
|
||||
*/
|
||||
ecomergedidx_t* merged_idx; /**< Merged taxid index.
|
||||
*/
|
||||
ecorankidx_t* ranks; /**< Taxonomic ranks.
|
||||
*/
|
||||
econameidx_t* names; /**< Taxon names.
|
||||
*/
|
||||
econameidx_t* preferred_names; /**< Taxon preferred names (i.e. added locally).
|
||||
*/
|
||||
ecotxidx_t* taxa; /**< Taxa.
|
||||
*/
|
||||
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
||||
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
|
||||
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
|
||||
|
||||
ecotx_t* obi_taxo_get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
||||
|
||||
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
|
||||
|
||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
||||
|
||||
/**
|
||||
* @brief Function reading an NCBI taxdump and loading its information into a taxonomy structure.
|
||||
*
|
||||
* @param taxdump The path to the taxdump directory.
|
||||
*
|
||||
* @returns A pointer on the read taxonomy structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function reading a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files)
|
||||
* and loading its information into a taxonomy structure.
|
||||
*
|
||||
* @param dms A pointer on the DMS to which the taxonomy belongs.
|
||||
* @param taxonomy_name The name (prefix) of the taxonomy.
|
||||
* @param read_alternative_names A boolean indicating whether names other than scientific and preferred names should be read.
|
||||
*
|
||||
* @returns A pointer on the read taxonomy structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function writing a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files).
|
||||
*
|
||||
* @param dms A pointer on the DMS to which the taxonomy belongs.
|
||||
* @param tax A pointer on the taxonomy structure.
|
||||
* @param tax_name The name (prefix) of the taxonomy.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function closing a taxonomy structure.
|
||||
*
|
||||
* This function writes all changes to the binary files (local taxa and preferred names) and free all allocated memory for the structure.
|
||||
*
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function adding a local taxon to a taxonomy.
|
||||
*
|
||||
* @param tax A pointer on the taxonomy structure.
|
||||
* @param name The taxon scientific name.
|
||||
* @param rank_name The taxon rank name.
|
||||
* @param parent_taxid The taxid of the parent node in the taxonomic tree.
|
||||
* @param min_taxid The minimum taxid to give to the new taxon (the function will choose a new taxid >= min_taxid and >= MIN_LOCAL_TAXID).
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function adding a preferred name to a taxon in a taxonomy, referred to by its taxid.
|
||||
*
|
||||
* @param tax A pointer on the taxonomy structure.
|
||||
* @param taxid The taxid of the taxon that should have a new preferred name.
|
||||
* @param preferred_name The new preferred name.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function adding a preferred name to a taxon in a taxonomy, referred to by the taxon pointer.
|
||||
*
|
||||
* @param tax A pointer on the taxonomy structure.
|
||||
* @param taxon A pointer on the taxon that should have a new preferred name.
|
||||
* @param preferred_name The new preferred name.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at a given rank.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param rankidx The index of the rank wanted.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the wanted rank.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning a taxon given its taxid.
|
||||
*
|
||||
* @param taxonomy A pointer on the taxonomy.
|
||||
* @param taxid The taxid of the taxon.
|
||||
*
|
||||
* @returns A pointer on the wanted taxon.
|
||||
* @retval NULL if no taxon was found with the given taxid.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function checking whether a taxon is under another in the taxonomy tree.
|
||||
*
|
||||
* @param taxon A pointer on the first taxon.
|
||||
* @param other_taxid The taxid of the second taxon.
|
||||
*
|
||||
* @returns A boolean indicating whether the first taxon is under the second taxon in the taxonomy tree.
|
||||
*/
|
||||
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the species level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the species level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the genus level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the genus level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the family level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the family level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the kingdom level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the kingdom level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the superkingdom level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the superkingdom level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
Reference in New Issue
Block a user