2016-03-11 13:56:38 +01:00
|
|
|
/********************************************************************
|
2016-10-14 17:03:10 +02:00
|
|
|
* OBIDMS taxonomy header file *
|
2016-03-11 13:56:38 +01:00
|
|
|
********************************************************************/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file obidms_taxonomy.h
|
|
|
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
|
|
|
* @date March 2nd 2016
|
|
|
|
* @brief Header file for the functions handling the reading of binary taxonomy files.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
|
|
|
#include "obidms.h"
|
|
|
|
|
|
|
|
|
2016-11-03 17:59:21 +01:00
|
|
|
#define MIN_LOCAL_TAXID (10000000)
|
|
|
|
#define TAX_NAME_LEN (1024)
|
|
|
|
|
|
|
|
|
2016-03-11 13:56:38 +01:00
|
|
|
typedef struct {
|
|
|
|
int32_t taxid;
|
|
|
|
int32_t rank;
|
|
|
|
int32_t parent;
|
|
|
|
int32_t name_length;
|
2017-01-05 14:28:36 +01:00
|
|
|
char name[];
|
2016-03-11 13:56:38 +01:00
|
|
|
} ecotxformat_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct ecotxnode {
|
2017-01-05 14:28:36 +01:00
|
|
|
int32_t taxid; // TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
|
2016-03-11 13:56:38 +01:00
|
|
|
int32_t rank;
|
|
|
|
int32_t farest;
|
2016-10-14 17:03:10 +02:00
|
|
|
int32_t idx;
|
2016-03-11 13:56:38 +01:00
|
|
|
struct ecotxnode* parent;
|
|
|
|
char* name;
|
2016-11-03 17:59:21 +01:00
|
|
|
bool local;
|
2016-03-11 13:56:38 +01:00
|
|
|
} ecotx_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
int32_t count;
|
2016-11-03 17:59:21 +01:00
|
|
|
int32_t ncbi_count;
|
|
|
|
int32_t local_count;
|
2016-03-11 13:56:38 +01:00
|
|
|
int32_t max_taxid;
|
|
|
|
int32_t buffer_size;
|
2017-01-05 14:28:36 +01:00
|
|
|
ecotx_t taxon[];
|
2016-03-11 13:56:38 +01:00
|
|
|
} ecotxidx_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
int32_t count;
|
2017-01-05 14:28:36 +01:00
|
|
|
char* label[];
|
2016-03-11 13:56:38 +01:00
|
|
|
} ecorankidx_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
2016-10-14 17:03:10 +02:00
|
|
|
int32_t is_scientific_name;
|
2016-03-11 13:56:38 +01:00
|
|
|
int32_t name_length;
|
|
|
|
int32_t class_length;
|
2016-10-14 17:03:10 +02:00
|
|
|
int32_t taxid; // taxid idx
|
2017-01-05 14:28:36 +01:00
|
|
|
char names[];
|
2016-03-11 13:56:38 +01:00
|
|
|
} econameformat_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
char* name;
|
|
|
|
char* class_name;
|
|
|
|
int32_t is_scientific_name;
|
|
|
|
struct ecotxnode* taxon;
|
|
|
|
} econame_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
2016-10-27 18:56:11 +02:00
|
|
|
int32_t count;
|
2017-01-05 14:28:36 +01:00
|
|
|
econame_t names[];
|
2016-03-11 13:56:38 +01:00
|
|
|
} econameidx_t;
|
|
|
|
|
|
|
|
|
2017-01-05 14:28:36 +01:00
|
|
|
typedef struct {
|
|
|
|
int32_t taxid;
|
|
|
|
int32_t idx;
|
|
|
|
} ecomerged_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
int32_t count;
|
|
|
|
ecomerged_t merged[];
|
|
|
|
} ecomergedidx_t;
|
|
|
|
|
|
|
|
|
2016-03-11 13:56:38 +01:00
|
|
|
typedef struct OBIDMS_taxonomy_t {
|
2017-01-05 14:28:36 +01:00
|
|
|
char tax_name[TAX_NAME_LEN];
|
|
|
|
OBIDMS_p dms;
|
|
|
|
ecomergedidx_t* merged_idx;
|
|
|
|
ecorankidx_t* ranks;
|
|
|
|
econameidx_t* names;
|
|
|
|
ecotxidx_t* taxa;
|
2016-03-11 13:56:38 +01:00
|
|
|
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
|
|
|
|
|
|
|
|
|
|
|
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
|
|
|
|
|
|
|
|
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
|
|
|
|
|
|
|
|
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
|
|
|
|
|
2017-01-06 15:52:21 +01:00
|
|
|
ecotx_t* obi_taxo_get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
2016-03-11 13:56:38 +01:00
|
|
|
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
|
|
|
|
|
|
|
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
|
|
|
|
|
|
|
|
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
|
|
|
|
|
|
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
|
|
|
|
|
|
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
|
|
|
|
|
|
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
|
|
|
|
|
|
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
|
|
|
2016-10-27 18:56:11 +02:00
|
|
|
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
|
|
|
|
|
|
|
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
2016-10-14 17:03:10 +02:00
|
|
|
|
2016-11-03 17:59:21 +01:00
|
|
|
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|