2016-03-11 13:56:38 +01:00
/********************************************************************
* OBIDMS taxonomy functions *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* @ file obidms_taxonomy . c
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
* @ date March 2 nd 2016
2017-01-18 18:22:49 +01:00
* @ brief Functions for handling the reading and writing of taxonomy files .
2016-03-11 13:56:38 +01:00
*/
# include <stdlib.h>
# include <stdio.h>
# include <string.h>
# include <stdbool.h>
# include <fcntl.h>
2016-10-27 18:56:11 +02:00
# include <sys/stat.h>
# include <sys/mman.h>
# include <unistd.h>
2016-03-11 13:56:38 +01:00
# include "obidms_taxonomy.h"
# include "obidms.h"
# include "obidebug.h"
# include "obierrno.h"
2016-04-08 15:38:57 +02:00
# include "utils.h"
2016-03-11 13:56:38 +01:00
# define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
2016-10-10 17:04:29 +02:00
2017-01-18 18:22:49 +01:00
/**************************************************************************
*
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* @ brief Internal function comparing two rank names .
*
* @ param label1 A char * pointer on the first rank name .
* @ param label2 A char * * pointer on a second pointer , that second char * pointer being on the second rank name .
* ( making the function usable with an ecorankidx_t structure and functions like bsearch )
*
* @ returns A value < 0 if label1 < label2 ,
* a value > 0 if label1 > label2 ,
* and 0 if label1 = = label2 .
*/
static int cmp_rank_labels ( const void * label1 , const void * label2 ) ;
/**
* @ brief Internal function comparing two taxids , one of them stored in an ecotx_t structure .
*
* @ param ptaxid The first taxid .
* @ param ptaxon A pointer on an ecotx_t structure where the second taxid is stored .
*
* @ returns A value < 0 if taxid1 < taxid2 ,
* a value > 0 if taxid1 > taxid2 ,
* and 0 if taxid1 = = taxid2 .
*/
static int cmp_taxids_in_ecotx_t ( const void * ptaxid , const void * ptaxon ) ;
/**
* @ brief Internal function comparing two taxids , one of them stored in an ecomerged_t structure .
*
* @ param ptaxid The first taxid .
* @ param ptaxon A pointer on an ecomerged_t structure where the second taxid is stored .
*
* @ returns A value < 0 if taxid1 < taxid2 ,
* a value > 0 if taxid1 > taxid2 ,
* and 0 if taxid1 = = taxid2 .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int cmp_taxids_in_ecomerged_t ( const void * ptaxid , const void * ptaxon ) ;
/**
* @ brief Internal function comparing two character strings pointed to by char * * pointers .
*
* @ param s1 A char * * pointer on a second pointer , that second char * pointer being on the first character string .
* @ param s2 A char * * pointer on a second pointer , that second char * pointer being on the second character string .
*
* @ returns A value < 0 if s1 < s2 ,
* a value > 0 if s1 > s2 ,
* and 0 if s1 = = s2 .
*/
static int cmp_str ( const void * s1 , const void * s2 ) ;
/**
* @ brief Internal function comparing two taxon names stored in econame_t structures .
*
* @ param n1 A pointer on the first econame_t structure .
* @ param n2 A pointer on the second econame_t structure .
*
* @ returns A value < 0 if n1 < n2 ,
* a value > 0 if n1 > n2 ,
* and 0 if n1 = = n2 .
*
* @ since 2016
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int cmp_names ( const void * n1 , const void * n2 ) ;
/**
* @ brief Internal function comparing returning the ecotx_t structure associated with a taxid .
*
* This function only looks for the taxid in the modern taxonomy , it does not consider deprecated
* and old taxids , unlike obi_taxo_get_taxon_with_taxid ( ) .
*
* @ param taxonomy A pointer on the taxonomy structure .
* @ param taxid The taxid of the taxon wanted .
*
* @ returns A pointer on the ecotx_t structure associated with a taxid .
*
* @ see obi_taxo_get_taxon_with_taxid ( )
*/
static ecotx_t * get_taxon_with_current_taxid ( OBIDMS_taxonomy_p taxonomy , int32_t taxid ) ;
/**
* @ brief Internal function returning the complete path to a taxonomy directory in a DMS .
*
* @ param dms A pointer on the DMS .
* @ param tax_name The name of the taxonomy .
*
* @ returns The complete path to the taxonomy directory .
* @ retval NULL if an error occurred .
*
* @ since 2016
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static char * get_taxonomy_path ( OBIDMS_p dms , const char * tax_name ) ;
/**
* @ brief Internal function returning the index of a rank in an ecorankidx_t structure .
*
* @ param label The name of the rank .
* @ param ranks A pointer on an ecorankidx_t structure .
*
* @ returns The index of a rank in the ecorankidx_t structure .
* @ retval - 1 if the rank was not found .
*/
2017-10-04 15:55:13 +02:00
static int32_t rank_label_to_index ( const char * label , ecorankidx_t * ranks ) ;
2017-01-18 18:22:49 +01:00
/**
* @ brief Internal function opening a binary taxonomy file ( . tdx , . rdx , . ndx , . adx , . pdx , . ldx ) .
*
* @ param file_name The file path .
* @ param count A pointer on an integer that the function will set to the number of records in the file .
* @ param abort_on_open_error A boolean indicating whether the function should trigger an error if the file can ' t be open .
*
* @ returns The FILE object .
* @ retval NULL if an error occurred or if the file was not found .
*/
static FILE * open_ecorecorddb ( const char * file_name , int32_t * count , int32_t abort_on_open_error ) ;
/**
* @ brief Internal function returning the next record in a binary taxonomy file ( . tdx , . rdx , . ndx , . adx , . pdx , . ldx ) .
*
* @ param f The file object with the offset at the start of a record .
* @ param record_size A pointer on an integer that the function will set to the size of the record .
*
* @ returns A pointer on the read record .
* @ retval NULL if an error occurred .
*/
static void * read_ecorecord ( FILE * f , int32_t * record_size ) ;
/**
* @ brief Internal function reading the next taxon record in a . tdx binary taxonomy file .
*
* @ param f The file object with the offset at the start of a record .
* @ param taxon A pointer on an empty , allocated ecotx_t structure that the function will fill .
*
* @ returns A pointer on the read record .
* @ retval NULL if an error occurred .
*/
static ecotx_t * readnext_ecotaxon ( FILE * f , ecotx_t * taxon ) ;
/**
* @ brief Internal function reading the next taxon name record in a . ndx binary taxonomy file .
*
* @ param f The file object with the offset at the start of a record .
* @ param name A pointer on an empty , allocated econame_t structure that the function will fill .
* @ param taxonomy A pointer on the taxonomy structure .
*
* @ returns A pointer on the read record .
* @ retval NULL if an error occurred .
*/
static econame_t * readnext_econame ( FILE * f , econame_t * name , OBIDMS_taxonomy_p taxonomy ) ;
/**
* @ brief Internal function reading the next taxon preferred name record in a . pdx binary taxonomy file .
*
* @ param f The file object with the offset at the start of a record .
* @ param name A pointer on an empty , allocated econame_t structure that the function will fill .
* @ param taxonomy A pointer on the taxonomy structure .
*
* @ returns A pointer on the read record .
* @ retval NULL if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static econame_t * readnext_ecopreferredname ( FILE * f , econame_t * name , OBIDMS_taxonomy_p taxonomy ) ;
/**
* @ brief Internal function reading a taxonomic ranks ( . rdx ) binary taxonomy file .
*
* @ param ranks_file_name The name of the . rdx file to read .
*
* @ returns A pointer on an ecorankidx_t structure .
* @ retval NULL if an error occurred .
*/
static ecorankidx_t * read_ranks_idx ( const char * ranks_file_name ) ;
/**
* @ brief Internal function reading the taxa ( . tdx , . ldx ) binary taxonomy file .
*
* @ param taxa_file_name The name of the . tdx file to read .
* @ param local_taxa_file_name The name of the . ldx file containing the local taxa to read if there is one .
*
* @ returns A pointer on an ecotxidx_t structure .
* @ retval NULL if an error occurred .
*/
static ecotxidx_t * read_taxonomy_idx ( const char * taxa_file_name , const char * local_taxa_file_name ) ;
/**
* @ brief Internal function reading a names ( . ndx ) binary taxonomy file .
*
* @ param file_name The name of the . ndx file to read .
* @ param taxonomy A pointer on the taxonomy structure .
*
* @ returns A pointer on an econameidx_t structure .
* @ retval NULL if an error occurred .
*/
static econameidx_t * read_names_idx ( const char * file_name , OBIDMS_taxonomy_p taxonomy ) ;
/**
* @ brief Internal function reading a preferred names ( . pdx ) binary taxonomy file .
*
* @ param file_name The name of the . pdx file to read .
* @ param taxonomy A pointer on the taxonomy structure .
*
* @ returns A pointer on an econameidx_t structure .
* @ retval NULL if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static econameidx_t * read_preferred_names_idx ( const char * file_name , OBIDMS_taxonomy_p taxonomy ) ;
/**
* @ brief Internal function reading a merged index ( . adx ) binary taxonomy file .
*
* @ param file_name The name of the . adx file to read .
* @ param taxonomy A pointer on the taxonomy structure .
*
* @ returns A pointer on an ecomergedidx_t structure .
* @ retval NULL if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static ecomergedidx_t * read_merged_idx ( const char * file_name , OBIDMS_taxonomy_p taxonomy ) ;
/**
* @ brief Internal function writing a rank index ( . rdx ) binary taxonomy file .
*
* @ param dms A pointer on the DMS .
* @ param tax A pointer on the taxonomy structure .
* @ param taxonomy_name The name of the taxonomy .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*/
static int write_ranks_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) ;
/**
* @ brief Internal function writing a taxonomy index ( . tdx ) binary taxonomy file .
*
* @ param dms A pointer on the DMS .
* @ param tax A pointer on the taxonomy structure .
* @ param taxonomy_name The name of the taxonomy .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*/
static int write_taxonomy_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) ;
/**
* @ brief Internal function writing a local taxonomy index ( . ldx ) binary taxonomy file .
*
* @ param dms A pointer on the DMS .
* @ param tax A pointer on the taxonomy structure .
* @ param taxonomy_name The name of the taxonomy .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*
* @ since 2016
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int write_local_taxonomy_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) ;
/**
* @ brief Internal function writing a names index ( . ndx ) binary taxonomy file .
*
* @ param dms A pointer on the DMS .
* @ param tax A pointer on the taxonomy structure .
* @ param taxonomy_name The name of the taxonomy .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*/
static int write_names_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) ;
/**
* @ brief Internal function writing a preferred names index ( . pdx ) binary taxonomy file .
*
* @ param dms A pointer on the DMS .
* @ param tax A pointer on the taxonomy structure .
* @ param taxonomy_name The name of the taxonomy .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int write_preferred_names_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) ;
/**
* @ brief Internal function writing a merged index ( . adx ) binary taxonomy file .
*
* @ param dms A pointer on the DMS .
* @ param tax A pointer on the taxonomy structure .
* @ param taxonomy_name The name of the taxonomy .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int write_merged_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) ;
/**
* @ brief Internal function reading the ' nodes . dmp ' file from an NCBI taxdump .
*
* @ param taxdump The path to the taxdump .
* @ param tax A pointer on the taxonomy structure .
* @ param rank_names_p A char * * * pointer on a non allocated char * array where the function will store rank names .
* @ param parent_taxids_p An int * * pointer on a non allocated int array where the function will store parent taxids .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int read_nodes_dmp ( const char * taxdump , OBIDMS_taxonomy_p tax , char * * * rank_names_p , int * * parent_taxids_p ) ;
/**
* @ brief Internal function reading the ' delnodes . dmp ' file from an NCBI taxdump .
*
* @ param taxdump The path to the taxdump .
* @ param tax A pointer on the taxonomy structure .
* @ param delnodes_p An int * * pointer on a non allocated int array where the function will store deleted taxids .
* @ param delnodes_count An int * pointer where the function will store the number of deleted taxids .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int read_delnodes_dmp ( const char * taxdump , OBIDMS_taxonomy_p tax , int32_t * * delnodes_p , int32_t * delnodes_count ) ;
/**
* @ brief Internal function reading the ' merged . dmp ' file from an NCBI taxdump .
*
* @ warning Should be used AFTER read_nodes_dmp ( ) and read_delnodes_dmp ( ) .
*
* The function merges the information about current nodes previously read in read_nodes_dmp ( ) ,
* the information about deleted nodes previously read in read_delnodes_dmp ( ) , and the information read
* in the ' merged . dmp ' file , to build the final merged taxon index in the taxonomy structure .
*
* @ param taxdump The path to the taxdump .
* @ param tax A pointer on the taxonomy structure .
* @ param delnodes An int * pointer containing the deleted taxids .
* @ param delnodes_count The number of deleted taxids .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int read_merged_dmp ( const char * taxdump , OBIDMS_taxonomy_p tax , int32_t * delnodes , int32_t delnodes_count ) ;
/**
* @ brief Internal function reading the ' names . dmp ' file from an NCBI taxdump .
*
* @ param taxdump The path to the taxdump .
* @ param tax A pointer on the taxonomy structure .
*
* @ returns An integer value indicating the success of the operation .
* @ retval 0 on success .
* @ retval - 1 if an error occurred .
*
* @ since January 2017
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int read_names_dmp ( const char * taxdump , OBIDMS_taxonomy_p tax ) ;
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int cmp_rank_labels ( const void * label1 , const void * label2 )
2016-10-10 17:04:29 +02:00
{
2016-10-27 18:56:11 +02:00
return strcmp ( ( const char * ) label1 , * ( const char * * ) label2 ) ;
}
2016-10-10 17:04:29 +02:00
2017-01-06 15:52:21 +01:00
static int cmp_taxids_in_ecotx_t ( const void * ptaxid , const void * ptaxon )
2016-10-27 18:56:11 +02:00
{
ecotx_t * current_taxon = ( ecotx_t * ) ptaxon ;
int32_t taxid = ( int32_t ) ( ( size_t ) ptaxid ) ;
return taxid - current_taxon - > taxid ;
2016-10-10 17:04:29 +02:00
}
2017-01-06 15:52:21 +01:00
static int cmp_taxids_in_ecomerged_t ( const void * ptaxid , const void * ptaxon )
{
ecomerged_t * current_taxon = ( ecomerged_t * ) ptaxon ;
int32_t taxid = ( int32_t ) ( ( size_t ) ptaxid ) ;
return taxid - current_taxon - > taxid ;
}
2016-10-27 18:56:11 +02:00
static int cmp_str ( const void * s1 , const void * s2 )
2016-10-10 17:04:29 +02:00
{
2016-10-27 18:56:11 +02:00
return strcmp ( * ( ( char * * ) s1 ) , * ( ( char * * ) s2 ) ) ;
2016-10-10 17:04:29 +02:00
}
2016-03-11 13:56:38 +01:00
2016-10-27 18:56:11 +02:00
static int cmp_names ( const void * n1 , const void * n2 )
2016-03-11 13:56:38 +01:00
{
2016-10-27 18:56:11 +02:00
econame_t name1 = * ( ( econame_t * ) n1 ) ;
econame_t name2 = * ( ( econame_t * ) n2 ) ;
return strcmp ( name1 . name , name2 . name ) ;
2016-03-11 13:56:38 +01:00
}
2017-01-18 18:22:49 +01:00
static ecotx_t * get_taxon_with_current_taxid ( OBIDMS_taxonomy_p taxonomy , int32_t taxid )
{
ecotx_t * current_taxon ;
int32_t count ;
count = ( taxonomy - > taxa ) - > count ;
current_taxon = ( ecotx_t * ) bsearch ( ( const void * ) ( ( size_t ) taxid ) ,
( const void * ) taxonomy - > taxa - > taxon ,
count ,
sizeof ( ecotx_t ) ,
cmp_taxids_in_ecotx_t ) ;
return current_taxon ;
}
static char * get_taxonomy_path ( OBIDMS_p dms , const char * tax_name )
2016-10-14 17:03:10 +02:00
{
char * all_tax_dir_path ;
char * tax_path ;
all_tax_dir_path = obi_dms_get_full_path ( dms , TAXONOMY_DIR_NAME ) ;
2016-10-27 18:56:11 +02:00
if ( all_tax_dir_path = = NULL )
return NULL ;
2016-10-14 17:03:10 +02:00
tax_path = ( char * ) malloc ( ( strlen ( all_tax_dir_path ) + strlen ( tax_name ) + 2 ) * sizeof ( char ) ) ;
2016-10-27 18:56:11 +02:00
if ( tax_path = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxonomy path " ) ;
free ( all_tax_dir_path ) ;
return NULL ;
}
2016-10-14 17:03:10 +02:00
if ( sprintf ( tax_path , " %s/%s " , all_tax_dir_path , tax_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building taxonomy path " ) ;
free ( all_tax_dir_path ) ;
return NULL ;
}
free ( all_tax_dir_path ) ;
return tax_path ;
}
2017-10-04 15:55:13 +02:00
static int32_t rank_label_to_index ( const char * label , ecorankidx_t * ranks )
2016-03-11 13:56:38 +01:00
{
char * * rep ;
2016-10-27 18:56:11 +02:00
rep = bsearch ( label , ranks - > label , ranks - > count , sizeof ( char * ) , cmp_rank_labels ) ;
2016-03-11 13:56:38 +01:00
if ( rep )
2016-10-14 17:03:10 +02:00
return rep - ranks - > label ;
2016-03-11 13:56:38 +01:00
return - 1 ;
}
2017-01-18 18:22:49 +01:00
static FILE * open_ecorecorddb ( const char * file_name ,
int32_t * count ,
int32_t abort_on_open_error )
{
FILE * f ;
int32_t read ;
f = fopen ( file_name , " rb " ) ;
if ( ! f )
{
if ( abort_on_open_error )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Couldn't open a taxonomy file " ) ;
fclose ( f ) ;
return NULL ;
}
else
{
* count = 0 ;
fclose ( f ) ;
return NULL ;
}
}
read = fread ( count ,
sizeof ( int32_t ) ,
1 ,
f ) ;
if ( read ! = 1 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading taxonomy record size " ) ;
fclose ( f ) ;
return NULL ;
}
return f ;
}
static void * read_ecorecord ( FILE * f , int32_t * record_size )
2016-03-11 13:56:38 +01:00
{
static void * buffer = NULL ;
int32_t buffer_size = 0 ;
int32_t read ;
if ( ! record_size )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: record_size can not be NULL " ) ;
return NULL ;
}
read = fread ( record_size ,
sizeof ( int32_t ) ,
2016-10-10 17:04:29 +02:00
1 ,
2016-03-11 13:56:38 +01:00
f ) ;
if ( feof ( f ) )
2016-10-27 18:56:11 +02:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: reached end of file " ) ;
return NULL ;
}
2016-03-11 13:56:38 +01:00
2016-10-10 17:04:29 +02:00
if ( read ! = 1 )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: error reading record size " ) ;
return NULL ;
}
if ( buffer_size < * record_size )
{
if ( buffer )
buffer = realloc ( buffer , * record_size ) ;
else
buffer = malloc ( * record_size ) ;
if ( buffer = = NULL )
{
2016-10-27 18:56:11 +02:00
obi_set_errno ( OBI_MALLOC_ERROR ) ;
2016-03-11 13:56:38 +01:00
obidebug ( 1 , " \n Error reading a taxonomy file: error allocating memory " ) ;
return NULL ;
}
}
read = fread ( buffer ,
* record_size ,
2016-10-10 17:04:29 +02:00
1 ,
2016-03-11 13:56:38 +01:00
f ) ;
2016-10-10 17:04:29 +02:00
if ( read ! = 1 )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: error reading a record %d, %d " , read , * record_size ) ;
2016-10-27 18:56:11 +02:00
free ( buffer ) ;
2016-03-11 13:56:38 +01:00
return NULL ;
}
return buffer ;
} ;
2017-01-18 18:22:49 +01:00
static ecotx_t * readnext_ecotaxon ( FILE * f , ecotx_t * taxon )
2016-03-11 13:56:38 +01:00
{
ecotxformat_t * raw ;
int32_t record_length ;
raw = read_ecorecord ( f , & record_length ) ;
if ( ! raw )
return NULL ;
taxon - > parent = ( ecotx_t * ) ( ( size_t ) raw - > parent ) ;
taxon - > taxid = raw - > taxid ;
taxon - > rank = raw - > rank ;
taxon - > farest = - 1 ;
taxon - > name = malloc ( ( raw - > name_length + 1 ) * sizeof ( char ) ) ;
2016-10-27 18:56:11 +02:00
if ( taxon - > name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: error allocating memory " ) ;
return NULL ;
}
2016-03-11 13:56:38 +01:00
strncpy ( taxon - > name , raw - > name , raw - > name_length ) ;
2016-10-27 18:56:11 +02:00
taxon - > name [ raw - > name_length ] = 0 ; // TODO note: this line is probably missing in ROBITaxonomy and source of a bug
2016-03-11 13:56:38 +01:00
return taxon ;
}
2017-01-18 18:22:49 +01:00
static econame_t * readnext_econame ( FILE * f , econame_t * name , OBIDMS_taxonomy_p taxonomy )
2016-03-11 13:56:38 +01:00
{
2017-01-18 18:22:49 +01:00
econameformat_t * raw ;
int32_t record_length ;
2016-03-11 13:56:38 +01:00
2017-01-18 18:22:49 +01:00
raw = read_ecorecord ( f , & record_length ) ;
if ( raw = = NULL )
return NULL ;
2016-03-11 13:56:38 +01:00
2017-01-18 18:22:49 +01:00
name - > is_scientific_name = raw - > is_scientific_name ;
name - > name = malloc ( ( raw - > name_length + 1 ) * sizeof ( char ) ) ;
if ( name - > name = = NULL )
2016-03-11 13:56:38 +01:00
{
2017-01-18 18:22:49 +01:00
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon name " ) ;
free ( raw ) ;
return NULL ;
2016-03-11 13:56:38 +01:00
}
2017-01-18 18:22:49 +01:00
strncpy ( name - > name , raw - > names , raw - > name_length ) ;
name - > name [ raw - > name_length ] = 0 ;
2016-03-11 13:56:38 +01:00
2017-01-18 18:22:49 +01:00
name - > class_name = malloc ( ( raw - > class_length + 1 ) * sizeof ( char ) ) ;
if ( name - > class_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon class name " ) ;
free ( name - > name ) ;
free ( raw ) ;
return NULL ;
}
strncpy ( name - > class_name , ( raw - > names + raw - > name_length ) , raw - > class_length ) ;
name - > class_name [ raw - > class_length ] = 0 ;
2016-03-11 13:56:38 +01:00
2017-01-18 18:22:49 +01:00
name - > taxon = taxonomy - > taxa - > taxon + raw - > taxid ;
return name ;
}
static econame_t * readnext_ecopreferredname ( FILE * f , econame_t * name , OBIDMS_taxonomy_p taxonomy )
{
econameformat_t * raw ;
int32_t record_length ;
raw = read_ecorecord ( f , & record_length ) ;
if ( raw = = NULL )
return NULL ;
name - > is_scientific_name = raw - > is_scientific_name ;
name - > name = malloc ( ( raw - > name_length + 1 ) * sizeof ( char ) ) ;
if ( name - > name = = NULL )
2016-03-11 13:56:38 +01:00
{
2017-01-18 18:22:49 +01:00
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon preferred name " ) ;
free ( raw ) ;
2016-03-11 13:56:38 +01:00
return NULL ;
}
2017-01-18 18:22:49 +01:00
strncpy ( name - > name , raw - > names , raw - > name_length ) ;
name - > name [ raw - > name_length ] = 0 ;
2016-03-11 13:56:38 +01:00
2017-01-18 18:22:49 +01:00
name - > class_name = malloc ( ( raw - > class_length + 1 ) * sizeof ( char ) ) ;
if ( name - > class_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon class name " ) ;
free ( name - > name ) ;
free ( raw ) ;
return NULL ;
}
strncpy ( name - > class_name , ( raw - > names + raw - > name_length ) , raw - > class_length ) ;
name - > class_name [ raw - > class_length ] = 0 ;
name - > taxon = taxonomy - > taxa - > taxon + raw - > taxid ;
// Add the preferred name in the taxon structure // TODO discuss: couldn't they all use the same pointer?
( taxonomy - > taxa - > taxon + raw - > taxid ) - > preferred_name = malloc ( ( raw - > name_length + 1 ) * sizeof ( char ) ) ;
if ( ( taxonomy - > taxa - > taxon + raw - > taxid ) - > preferred_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon preferred name " ) ;
free ( name - > name ) ;
free ( name - > class_name ) ;
free ( raw ) ;
return NULL ;
}
strcpy ( ( taxonomy - > taxa - > taxon + raw - > taxid ) - > preferred_name , name - > name ) ;
return name ;
2016-03-11 13:56:38 +01:00
}
2017-01-18 18:22:49 +01:00
static ecorankidx_t * read_ranks_idx ( const char * ranks_file_name )
2016-03-11 13:56:38 +01:00
{
int32_t count ;
FILE * ranks_file ;
ecorankidx_t * ranks_index ;
int32_t i ;
int32_t rank_length ;
char * buffer ;
ranks_file = open_ecorecorddb ( ranks_file_name , & count , 0 ) ;
if ( ranks_file = = NULL )
return NULL ;
2017-01-11 16:36:08 +01:00
ranks_index = ( ecorankidx_t * ) malloc ( sizeof ( ecorankidx_t ) + sizeof ( char * ) * count ) ;
2016-10-27 18:56:11 +02:00
if ( ranks_index = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxonomy rank structure " ) ;
fclose ( ranks_file ) ;
return NULL ;
}
2016-03-11 13:56:38 +01:00
ranks_index - > count = count ;
for ( i = 0 ; i < count ; i + + )
{
buffer = read_ecorecord ( ranks_file , & rank_length ) ;
2016-10-27 18:56:11 +02:00
if ( buffer = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a value in a taxonomy file " ) ;
fclose ( ranks_file ) ;
free ( ranks_index ) ;
return NULL ;
}
2016-03-11 13:56:38 +01:00
ranks_index - > label [ i ] = ( char * ) malloc ( rank_length + 1 ) ;
2016-10-27 18:56:11 +02:00
if ( ranks_index - > label [ i ] = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxonomy rank label " ) ;
fclose ( ranks_file ) ;
free ( ranks_index ) ;
free ( buffer ) ;
return NULL ;
}
2016-03-11 13:56:38 +01:00
strncpy ( ranks_index - > label [ i ] , buffer , rank_length ) ;
2016-10-14 17:03:10 +02:00
( ranks_index - > label [ i ] ) [ rank_length ] = 0 ;
2016-03-11 13:56:38 +01:00
}
2016-10-27 18:56:11 +02:00
fclose ( ranks_file ) ;
2016-03-11 13:56:38 +01:00
return ranks_index ;
}
2017-01-18 18:22:49 +01:00
static ecotxidx_t * read_taxonomy_idx ( const char * taxa_file_name , const char * local_taxa_file_name )
2016-03-11 13:56:38 +01:00
{
int32_t count_taxa ;
int32_t count_local_taxa ;
FILE * f_taxa ;
FILE * f_local_taxa ;
ecotxidx_t * taxa_index ;
struct ecotxnode * t ;
int32_t i ;
int32_t j ;
2016-10-27 18:56:11 +02:00
f_taxa = open_ecorecorddb ( taxa_file_name , & count_taxa , 1 ) ;
2016-03-11 13:56:38 +01:00
if ( f_taxa = = NULL )
{
obidebug ( 1 , " \n Error reading taxonomy taxa file " ) ;
return NULL ;
}
f_local_taxa = open_ecorecorddb ( local_taxa_file_name , & count_local_taxa , 0 ) ;
2017-01-11 16:36:08 +01:00
taxa_index = ( ecotxidx_t * ) malloc ( sizeof ( ecotxidx_t ) + sizeof ( ecotx_t ) * ( count_taxa + count_local_taxa ) ) ;
2016-10-27 18:56:11 +02:00
if ( taxa_index = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxonomy structure " ) ;
fclose ( f_taxa ) ;
fclose ( f_local_taxa ) ;
return NULL ;
}
2016-03-11 13:56:38 +01:00
taxa_index - > count = count_taxa + count_local_taxa ;
2016-11-03 17:59:21 +01:00
taxa_index - > ncbi_count = count_taxa ;
taxa_index - > local_count = count_local_taxa ;
2016-03-11 13:56:38 +01:00
taxa_index - > buffer_size = taxa_index - > count ;
taxa_index - > max_taxid = 0 ;
printf ( " Reading %d taxa... \n " , count_taxa ) ;
for ( i = 0 ; i < count_taxa ; i + + )
{
readnext_ecotaxon ( f_taxa , & ( taxa_index - > taxon [ i ] ) ) ;
2017-01-16 17:28:20 +01:00
taxa_index - > taxon [ i ] . idx = i ;
2016-03-11 13:56:38 +01:00
taxa_index - > taxon [ i ] . parent = taxa_index - > taxon + ( size_t ) taxa_index - > taxon [ i ] . parent ;
taxa_index - > taxon [ i ] . parent - > farest = 0 ;
if ( taxa_index - > taxon [ i ] . taxid > taxa_index - > max_taxid )
taxa_index - > max_taxid = taxa_index - > taxon [ i ] . taxid ;
2017-01-16 17:28:20 +01:00
taxa_index - > taxon [ i ] . preferred_name = NULL ;
2016-03-11 13:56:38 +01:00
}
if ( count_local_taxa > 0 )
printf ( " Reading %d local taxa... \n " , count_local_taxa ) ;
else
printf ( " No local taxa \n " ) ;
count_taxa = taxa_index - > count ;
for ( ; i < count_taxa ; i + + ) {
readnext_ecotaxon ( f_local_taxa , & ( taxa_index - > taxon [ i ] ) ) ;
2016-10-14 17:03:10 +02:00
taxa_index - > taxon [ i ] . idx = i ;
2016-03-11 13:56:38 +01:00
taxa_index - > taxon [ i ] . parent = taxa_index - > taxon + ( size_t ) taxa_index - > taxon [ i ] . parent ;
taxa_index - > taxon [ i ] . parent - > farest = 0 ;
if ( taxa_index - > taxon [ i ] . taxid > taxa_index - > max_taxid )
taxa_index - > max_taxid = taxa_index - > taxon [ i ] . taxid ;
2017-01-16 17:28:20 +01:00
taxa_index - > taxon [ i ] . preferred_name = NULL ;
2016-03-11 13:56:38 +01:00
}
for ( i = 0 ; i < count_taxa ; i + + )
{
t = taxa_index - > taxon + i ;
if ( t - > farest = = - 1 )
{
t - > farest = 0 ;
while ( t - > parent ! = t )
2017-01-18 18:22:49 +01:00
{
j = t - > farest + 1 ;
if ( j > t - > parent - > farest )
{
t - > parent - > farest = j ;
t = t - > parent ;
}
else
t = taxa_index - > taxon ;
}
}
2017-01-16 17:28:20 +01:00
}
2017-01-18 18:22:49 +01:00
fclose ( f_taxa ) ;
if ( f_local_taxa ! = NULL )
fclose ( f_local_taxa ) ;
return taxa_index ;
2017-01-16 17:28:20 +01:00
}
2017-01-18 18:22:49 +01:00
static econameidx_t * read_names_idx ( const char * file_name , OBIDMS_taxonomy_p taxonomy )
2016-03-11 13:56:38 +01:00
{
int32_t count ;
FILE * f ;
econameidx_t * index_names ;
int32_t i ;
f = open_ecorecorddb ( file_name , & count , 0 ) ;
if ( f = = NULL )
2017-01-16 17:28:20 +01:00
return NULL ;
index_names = ( econameidx_t * ) malloc ( sizeof ( econameidx_t ) + sizeof ( econame_t ) * count ) ;
if ( index_names = = NULL )
2016-10-27 18:56:11 +02:00
{
2017-01-16 17:28:20 +01:00
obi_set_errno ( OBI_MALLOC_ERROR ) ;
2016-10-27 18:56:11 +02:00
obidebug ( 1 , " \n Error reading taxonomy name file " ) ;
return NULL ;
}
2016-03-11 13:56:38 +01:00
2017-01-16 17:28:20 +01:00
index_names - > count = count ;
for ( i = 0 ; i < count ; i + + )
{
readnext_econame ( f , ( index_names - > names ) + i , taxonomy ) ;
if ( ( index_names - > names ) + i = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading taxonomy name file " ) ;
free ( index_names ) ;
return NULL ;
}
}
fclose ( f ) ;
return index_names ;
}
2017-01-18 18:22:49 +01:00
static econameidx_t * read_preferred_names_idx ( const char * file_name , OBIDMS_taxonomy_p taxonomy )
2017-01-16 17:28:20 +01:00
{
int32_t count ;
FILE * f ;
econameidx_t * index_names ;
int32_t i ;
f = open_ecorecorddb ( file_name , & count , 0 ) ;
if ( f = = NULL )
return NULL ;
2017-01-11 16:36:08 +01:00
index_names = ( econameidx_t * ) malloc ( sizeof ( econameidx_t ) + sizeof ( econame_t ) * count ) ;
2016-10-27 18:56:11 +02:00
if ( index_names = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reading taxonomy name file " ) ;
return NULL ;
}
2016-03-11 13:56:38 +01:00
index_names - > count = count ;
for ( i = 0 ; i < count ; i + + )
2016-10-27 18:56:11 +02:00
{
2017-01-16 17:28:20 +01:00
readnext_ecopreferredname ( f , ( index_names - > names ) + i , taxonomy ) ;
2016-10-27 18:56:11 +02:00
if ( ( index_names - > names ) + i = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading taxonomy name file " ) ;
free ( index_names ) ;
return NULL ;
}
}
2016-03-11 13:56:38 +01:00
2016-10-27 18:56:11 +02:00
fclose ( f ) ;
2016-03-11 13:56:38 +01:00
2016-10-27 18:56:11 +02:00
return index_names ;
2016-03-11 13:56:38 +01:00
}
2017-01-18 18:22:49 +01:00
static ecomergedidx_t * read_merged_idx ( const char * file_name , OBIDMS_taxonomy_p taxonomy )
2017-01-06 15:52:21 +01:00
{
int32_t count ;
FILE * f ;
ecomergedidx_t * index_merged_idx ;
ecomerged_t * merged_idx ;
int32_t i ;
int32_t record_length ;
f = open_ecorecorddb ( file_name , & count , 0 ) ;
if ( f = = NULL )
{
obidebug ( 1 , " \n Error reading taxonomy name file " ) ;
return NULL ;
}
index_merged_idx = ( ecomergedidx_t * ) malloc ( sizeof ( ecomergedidx_t ) + ( sizeof ( ecomerged_t ) * count ) ) ;
if ( index_merged_idx = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reading taxonomy name file " ) ;
return NULL ;
}
index_merged_idx - > count = count ;
for ( i = 0 ; i < count ; i + + )
{
merged_idx = read_ecorecord ( f , & record_length ) ;
memcpy ( ( index_merged_idx - > merged ) + i , merged_idx , record_length ) ;
if ( ( index_merged_idx - > merged ) + i = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading taxonomy name file " ) ;
free ( index_merged_idx ) ;
return NULL ;
}
}
fclose ( f ) ;
return index_merged_idx ;
}
2017-01-18 18:22:49 +01:00
static int write_ranks_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct?
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
int32_t length ;
2016-03-11 13:56:38 +01:00
2016-11-03 17:59:21 +01:00
// Compute file size
file_size = sizeof ( int32_t ) ;
for ( i = 0 ; i < ( tax - > ranks ) - > count ; i + + )
2016-10-27 18:56:11 +02:00
{
2016-11-03 17:59:21 +01:00
file_size = file_size + sizeof ( int32_t ) ; // To store label size
file_size = file_size + strlen ( ( ( tax - > ranks ) - > label ) [ i ] ) ; // To store label
2016-10-27 18:56:11 +02:00
}
2016-03-11 13:56:38 +01:00
2016-11-03 17:59:21 +01:00
// Build the taxonomy directory path
2016-10-14 17:03:10 +02:00
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
2016-03-11 13:56:38 +01:00
2017-01-11 16:36:08 +01:00
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 6 ) * sizeof ( char ) ) ;
2016-11-03 17:59:21 +01:00
if ( file_name = = NULL )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
// Build the file path
if ( sprintf ( file_name , " %s/%s.rdx " , taxonomy_path , taxonomy_name ) < 0 )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
free ( taxonomy_path ) ;
// Create file
file_descriptor = open ( file_name , O_RDWR | O_CREAT | O_EXCL , 0777 ) ;
if ( file_descriptor < 0 )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error creating a binary taxonomy file %s " , file_name ) ;
free ( file_name ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
free ( file_name ) ;
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
// Write rank count
if ( write ( file_descriptor , & ( ( tax - > ranks ) - > count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
// Write ranks
for ( i = 0 ; i < ( tax - > ranks ) - > count ; i + + )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
length = strlen ( ( ( tax - > ranks ) - > label ) [ i ] ) ;
// Write rank size
if ( write ( file_descriptor , & length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
// Write rank label
if ( write ( file_descriptor , ( ( tax - > ranks ) - > label ) [ i ] , length ) < ( ( ssize_t ) length ) )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
}
2016-11-03 17:59:21 +01:00
// Close file
if ( close ( file_descriptor ) < 0 )
{
2017-01-11 16:36:08 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing an rdx taxonomy file " ) ;
2016-11-03 17:59:21 +01:00
return - 1 ;
}
2016-03-11 13:56:38 +01:00
2016-11-03 17:59:21 +01:00
return 0 ;
2016-03-11 13:56:38 +01:00
}
2017-01-18 18:22:49 +01:00
static int write_taxonomy_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
int32_t name_length ;
int32_t record_size ;
2016-10-27 18:56:11 +02:00
2016-11-03 17:59:21 +01:00
// Compute file size
file_size = sizeof ( int32_t ) ; // To store record count
for ( i = 0 ; i < ( tax - > taxa ) - > ncbi_count ; i + + )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
file_size = file_size + sizeof ( int32_t ) * 5 ; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen ( tax - > taxa - > taxon [ i ] . name ) ; // To store name
}
2016-03-11 13:56:38 +01:00
2016-11-03 17:59:21 +01:00
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
2016-03-11 13:56:38 +01:00
2017-01-11 16:36:08 +01:00
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 6 ) * sizeof ( char ) ) ;
2016-11-03 17:59:21 +01:00
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
// Build the file path
if ( sprintf ( file_name , " %s/%s.tdx " , taxonomy_path , taxonomy_name ) < 0 )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2016-11-03 17:59:21 +01:00
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
free ( taxonomy_path ) ;
2016-03-11 13:56:38 +01:00
2016-11-03 17:59:21 +01:00
// Create file
file_descriptor = open ( file_name , O_RDWR | O_CREAT | O_EXCL , 0777 ) ;
if ( file_descriptor < 0 )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2016-11-03 17:59:21 +01:00
obidebug ( 1 , " \n Error creating a binary taxonomy file " ) ;
free ( file_name ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
free ( file_name ) ;
2016-03-11 13:56:38 +01:00
2016-11-03 17:59:21 +01:00
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2016-11-03 17:59:21 +01:00
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
// Write record count
if ( write ( file_descriptor , & ( tax - > taxa - > ncbi_count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2016-11-03 17:59:21 +01:00
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
// Write records
for ( i = 0 ; i < ( tax - > taxa ) - > ncbi_count ; i + + )
2016-03-11 13:56:38 +01:00
{
2016-11-03 17:59:21 +01:00
name_length = strlen ( tax - > taxa - > taxon [ i ] . name ) ;
record_size = 4 * sizeof ( int32_t ) + name_length ;
// Write record size
if ( write ( file_descriptor , & record_size , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write taxid
if ( write ( file_descriptor , & ( tax - > taxa - > taxon [ i ] . taxid ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write rank index
if ( write ( file_descriptor , & ( tax - > taxa - > taxon [ i ] . rank ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write parent index
if ( write ( file_descriptor , & ( ( tax - > taxa - > taxon [ i ] . parent ) - > idx ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name length
if ( write ( file_descriptor , & name_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name
if ( write ( file_descriptor , tax - > taxa - > taxon [ i ] . name , name_length ) < ( ( ssize_t ) name_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
// Close file
if ( close ( file_descriptor ) < 0 )
2016-03-11 13:56:38 +01:00
{
2017-01-11 16:36:08 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a tdx taxonomy file " ) ;
2016-11-03 17:59:21 +01:00
return - 1 ;
2016-03-11 13:56:38 +01:00
}
2016-11-03 17:59:21 +01:00
return 0 ;
2016-03-11 13:56:38 +01:00
}
2016-10-14 17:03:10 +02:00
2017-01-18 18:22:49 +01:00
static int write_local_taxonomy_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
2016-10-14 17:03:10 +02:00
{
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
2016-11-03 17:59:21 +01:00
int32_t name_length ;
int32_t record_size ;
2016-10-14 17:03:10 +02:00
// Compute file size
2016-11-03 17:59:21 +01:00
file_size = sizeof ( int32_t ) ; // To store record count
for ( i = ( tax - > taxa ) - > ncbi_count ; i < ( tax - > taxa ) - > count ; i + + )
2016-10-14 17:03:10 +02:00
{
2016-11-03 17:59:21 +01:00
file_size = file_size + sizeof ( int32_t ) * 5 ; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen ( tax - > taxa - > taxon [ i ] . name ) ; // To store name
2016-10-14 17:03:10 +02:00
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
2017-01-11 16:36:08 +01:00
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 6 ) * sizeof ( char ) ) ;
2016-10-14 17:03:10 +02:00
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
}
// Build the file path
2016-11-03 17:59:21 +01:00
if ( sprintf ( file_name , " %s/%s.ldx " , taxonomy_path , taxonomy_name ) < 0 )
2016-10-14 17:03:10 +02:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
}
free ( taxonomy_path ) ;
// Create file
2016-11-03 17:59:21 +01:00
file_descriptor = open ( file_name , O_RDWR | O_CREAT , 0777 ) ;
2016-10-14 17:03:10 +02:00
if ( file_descriptor < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2016-11-03 17:59:21 +01:00
obidebug ( 1 , " \n Error creating a binary taxonomy file " ) ;
2016-10-14 17:03:10 +02:00
free ( file_name ) ;
return - 1 ;
}
free ( file_name ) ;
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
2016-11-03 17:59:21 +01:00
// Write record count
if ( write ( file_descriptor , & ( ( tax - > taxa ) - > local_count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
2016-10-14 17:03:10 +02:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
2016-11-03 17:59:21 +01:00
// Write records
for ( i = ( tax - > taxa ) - > ncbi_count ; i < ( tax - > taxa ) - > count ; i + + )
2016-10-14 17:03:10 +02:00
{
2016-11-03 17:59:21 +01:00
name_length = strlen ( tax - > taxa - > taxon [ i ] . name ) ;
record_size = 4 * sizeof ( int32_t ) + name_length ;
2016-10-14 17:03:10 +02:00
2016-11-03 17:59:21 +01:00
// Write record size
if ( write ( file_descriptor , & record_size , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
2016-10-14 17:03:10 +02:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write taxid
if ( write ( file_descriptor , & ( tax - > taxa - > taxon [ i ] . taxid ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write rank index
if ( write ( file_descriptor , & ( tax - > taxa - > taxon [ i ] . rank ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write parent index
if ( write ( file_descriptor , & ( ( tax - > taxa - > taxon [ i ] . parent ) - > idx ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name length
if ( write ( file_descriptor , & name_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name
if ( write ( file_descriptor , tax - > taxa - > taxon [ i ] . name , name_length ) < ( ( ssize_t ) name_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
}
// Close file
if ( close ( file_descriptor ) < 0 )
{
2017-01-11 16:36:08 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a ldx taxonomy file " ) ;
2016-10-14 17:03:10 +02:00
return - 1 ;
}
return 0 ;
}
2017-01-18 18:22:49 +01:00
static int write_names_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
2016-10-14 17:03:10 +02:00
{
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
int32_t name_length ;
int32_t class_length ;
int32_t record_size ;
// Compute file size
file_size = sizeof ( int32_t ) ; // To store record count
for ( i = 0 ; i < ( tax - > names ) - > count ; i + + )
{
file_size = file_size + sizeof ( int32_t ) * 5 ; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen ( tax - > names - > names [ i ] . name ) ; // To store name
file_size = file_size + strlen ( tax - > names - > names [ i ] . class_name ) ; // To store name
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
2017-01-11 16:36:08 +01:00
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 6 ) * sizeof ( char ) ) ;
2016-10-14 17:03:10 +02:00
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
}
// Build the file path
if ( sprintf ( file_name , " %s/%s.ndx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
}
free ( taxonomy_path ) ;
// Create file
file_descriptor = open ( file_name , O_RDWR | O_CREAT | O_EXCL , 0777 ) ;
if ( file_descriptor < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error creating a binary taxonomy file " ) ;
free ( file_name ) ;
return - 1 ;
}
free ( file_name ) ;
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write record count
if ( write ( file_descriptor , & ( tax - > names - > count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write records
for ( i = 0 ; i < tax - > names - > count ; i + + )
{
name_length = strlen ( tax - > names - > names [ i ] . name ) ;
class_length = strlen ( tax - > names - > names [ i ] . class_name ) ;
record_size = 4 * sizeof ( int32_t ) + name_length + class_length ;
// Write record size
if ( write ( file_descriptor , & record_size , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write if the name is a scientific name
if ( write ( file_descriptor , & ( tax - > names - > names [ i ] . is_scientific_name ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name length
if ( write ( file_descriptor , & name_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write class length
if ( write ( file_descriptor , & class_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write taxid index
if ( write ( file_descriptor , & ( tax - > names - > names [ i ] . taxon - > idx ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name
if ( write ( file_descriptor , tax - > names - > names [ i ] . name , name_length ) < ( ( ssize_t ) name_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write class
if ( write ( file_descriptor , tax - > names - > names [ i ] . class_name , class_length ) < ( ( ssize_t ) class_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
}
// Close file
if ( close ( file_descriptor ) < 0 )
{
2016-10-27 18:56:11 +02:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Error closing a ndx taxonomy file " ) ;
2016-10-14 17:03:10 +02:00
return - 1 ;
}
return 0 ;
}
2016-10-27 18:56:11 +02:00
2017-01-18 18:22:49 +01:00
static int write_preferred_names_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
2017-01-16 17:28:20 +01:00
{
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
int32_t name_length ;
int32_t class_length ;
int32_t record_size ;
// Compute file size
file_size = sizeof ( int32_t ) ; // To store record count
for ( i = 0 ; i < ( tax - > preferred_names ) - > count ; i + + )
{
file_size = file_size + sizeof ( int32_t ) * 5 ; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen ( tax - > preferred_names - > names [ i ] . name ) ; // To store name
file_size = file_size + strlen ( tax - > preferred_names - > names [ i ] . class_name ) ; // To store name
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 6 ) * sizeof ( char ) ) ;
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
}
// Build the file path
if ( sprintf ( file_name , " %s/%s.pdx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
}
free ( taxonomy_path ) ;
// Create file
2017-01-18 18:22:49 +01:00
file_descriptor = open ( file_name , O_RDWR | O_CREAT , 0777 ) ;
2017-01-16 17:28:20 +01:00
if ( file_descriptor < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error creating a binary taxonomy file " ) ;
free ( file_name ) ;
return - 1 ;
}
free ( file_name ) ;
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write record count
if ( write ( file_descriptor , & ( tax - > preferred_names - > count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write records
for ( i = 0 ; i < tax - > preferred_names - > count ; i + + )
{
name_length = strlen ( tax - > preferred_names - > names [ i ] . name ) ;
class_length = strlen ( tax - > preferred_names - > names [ i ] . class_name ) ;
record_size = 4 * sizeof ( int32_t ) + name_length + class_length ;
// Write record size
if ( write ( file_descriptor , & record_size , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write if the name is a scientific name
if ( write ( file_descriptor , & ( tax - > preferred_names - > names [ i ] . is_scientific_name ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name length
if ( write ( file_descriptor , & name_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write class length
if ( write ( file_descriptor , & class_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write taxid index
if ( write ( file_descriptor , & ( tax - > preferred_names - > names [ i ] . taxon - > idx ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name
if ( write ( file_descriptor , tax - > preferred_names - > names [ i ] . name , name_length ) < ( ( ssize_t ) name_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write class
if ( write ( file_descriptor , tax - > preferred_names - > names [ i ] . class_name , class_length ) < ( ( ssize_t ) class_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
}
// Close file
if ( close ( file_descriptor ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a pdx taxonomy file " ) ;
return - 1 ;
}
return 0 ;
}
2017-01-18 18:22:49 +01:00
static int write_merged_idx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct?
2017-01-05 15:37:13 +01:00
{
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
2017-01-06 15:52:21 +01:00
int32_t record_size ;
2017-01-05 15:37:13 +01:00
// Compute file size
file_size = sizeof ( int32_t ) + ( sizeof ( int32_t ) * 3 * ( tax - > merged_idx ) - > count ) ;
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 6 ) * sizeof ( char ) ) ;
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
}
// Build the file path
if ( sprintf ( file_name , " %s/%s.adx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
}
free ( taxonomy_path ) ;
// Create file
file_descriptor = open ( file_name , O_RDWR | O_CREAT | O_EXCL , 0777 ) ;
if ( file_descriptor < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error creating a binary taxonomy file %s " , file_name ) ;
free ( file_name ) ;
return - 1 ;
}
free ( file_name ) ;
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write merged indices count
if ( write ( file_descriptor , & ( ( tax - > merged_idx ) - > count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
2017-01-06 15:52:21 +01:00
record_size = 2 * sizeof ( int32_t ) ;
2017-01-05 15:37:13 +01:00
// Write merged indices
for ( i = 0 ; i < ( tax - > merged_idx ) - > count ; i + + )
{
2017-01-06 15:52:21 +01:00
// Write record size
if ( write ( file_descriptor , & ( record_size ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
2017-01-05 15:37:13 +01:00
// Write taxid
if ( write ( file_descriptor , & ( ( ( tax - > merged_idx ) - > merged ) [ i ] . taxid ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write index corresponding to the taxid in the ecotxidx_t structure
if ( write ( file_descriptor , & ( ( ( tax - > merged_idx ) - > merged ) [ i ] . idx ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
}
// Close file
if ( close ( file_descriptor ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Error closing an adx taxonomy file " ) ;
2017-01-05 15:37:13 +01:00
return - 1 ;
}
return 0 ;
}
2016-10-27 18:56:11 +02:00
2017-01-11 16:36:08 +01:00
int read_nodes_dmp ( const char * taxdump , OBIDMS_taxonomy_p tax , char * * * rank_names_p , int * * parent_taxids_p )
{
struct dirent * dp ;
DIR * tax_dir ;
FILE * file ;
char * file_name ;
bool file_found = false ;
char line [ 2048 ] ; // TODO large enough?
char * elt ;
int buffer_size ;
int i , n ;
buffer_size = 10000 ;
// Initialize rank names and parent taxids arrays
* parent_taxids_p = malloc ( buffer_size * sizeof ( int ) ) ;
if ( * parent_taxids_p = = NULL )
2016-10-27 18:56:11 +02:00
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Error allocating the memory for the parent taxids array " ) ;
return - 1 ;
2016-10-27 18:56:11 +02:00
}
2017-01-11 16:36:08 +01:00
* rank_names_p = malloc ( buffer_size * sizeof ( char * ) ) ;
if ( * rank_names_p = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for the rank names array " ) ;
free ( * parent_taxids_p ) ;
return - 1 ;
}
2016-10-27 18:56:11 +02:00
2017-01-11 16:36:08 +01:00
// Open the taxdum directory
2016-10-27 18:56:11 +02:00
tax_dir = opendir ( taxdump ) ;
if ( tax_dir = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem opening a taxdump directory " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
return - 1 ;
2016-10-27 18:56:11 +02:00
}
2017-01-11 16:36:08 +01:00
// Look for the 'nodes.dmp' file
2016-10-27 18:56:11 +02:00
while ( ( dp = readdir ( tax_dir ) ) ! = NULL )
{
if ( strcmp ( dp - > d_name , " nodes.dmp " ) = = 0 )
{
2017-01-11 16:36:08 +01:00
file_found = true ;
2016-10-27 18:56:11 +02:00
// Initializing the taxa structure
tax - > taxa = ( ecotxidx_t * ) malloc ( sizeof ( ecotxidx_t ) + sizeof ( ecotx_t ) * buffer_size ) ;
if ( tax - > taxa = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a taxonomy structure " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
// Allocating the memory for the file name
2017-01-11 16:36:08 +01:00
file_name = ( char * ) malloc ( ( strlen ( taxdump ) + 11 ) * sizeof ( char ) ) ;
2016-10-27 18:56:11 +02:00
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a file name " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
// Build the file path
if ( sprintf ( file_name , " %s/nodes.dmp " , taxdump ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Error building a taxonomy file name for 'nodes.dmp' " ) ;
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
closedir ( tax_dir ) ;
free ( file_name ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
file = fopen ( file_name , " r " ) ;
if ( file = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem opening a taxonomy file " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
closedir ( tax_dir ) ;
free ( file_name ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
free ( file_name ) ;
( tax - > taxa ) - > max_taxid = 0 ;
n = 0 ;
while ( fgets ( line , sizeof ( line ) , file ) )
{
// Enlarge structures if needed
if ( n = = buffer_size )
{
buffer_size = buffer_size * 2 ;
tax - > taxa = ( ecotxidx_t * ) realloc ( tax - > taxa , sizeof ( ecotxidx_t ) + sizeof ( ecotx_t ) * buffer_size ) ;
if ( tax - > taxa = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a taxonomy structure " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
2017-01-11 16:36:08 +01:00
* parent_taxids_p = ( int * ) realloc ( * parent_taxids_p , sizeof ( int ) * buffer_size ) ;
if ( * parent_taxids_p = = NULL )
2016-10-27 18:56:11 +02:00
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Error reallocating memory for the parent taxids array " ) ;
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
2017-01-11 16:36:08 +01:00
* rank_names_p = ( char * * ) realloc ( * rank_names_p , sizeof ( char * ) * buffer_size ) ;
if ( * rank_names_p = = NULL )
2016-10-27 18:56:11 +02:00
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Error reallocating memory for the rank names array " ) ;
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
}
// Check for terminal '\n' character (line complete)
if ( line [ strlen ( line ) - 1 ] ! = ' \n ' )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: line buffer size not large enough for line in taxonomy file " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
( tax - > taxa ) - > taxon [ n ] . idx = n ;
// Parse 3 first elements separated by '|'
elt = strtok ( line , " | " ) ;
// Remove the last character (tab character)
elt [ strlen ( elt ) - 1 ] = ' \0 ' ;
// First element: taxid
( tax - > taxa ) - > taxon [ n ] . taxid = atoi ( elt ) ;
// Update max taxid
if ( ( tax - > taxa ) - > taxon [ n ] . taxid > ( tax - > taxa ) - > max_taxid )
( tax - > taxa ) - > max_taxid = ( tax - > taxa ) - > taxon [ n ] . taxid ;
// Initialize farest taxid value
( tax - > taxa ) - > taxon [ n ] . farest = - 1 ;
i = 1 ;
while ( i < 3 )
{
elt = strtok ( NULL , " | " ) ;
// Remove the first and the last characters (tab characters)
elt = elt + 1 ;
elt [ strlen ( elt ) - 1 ] = ' \0 ' ;
if ( i = = 1 )
2017-01-11 16:36:08 +01:00
( * parent_taxids_p ) [ n ] = atoi ( elt ) ;
2016-10-27 18:56:11 +02:00
else if ( i = = 2 )
{
2017-01-11 16:36:08 +01:00
( * rank_names_p ) [ n ] = ( char * ) malloc ( ( strlen ( elt ) + 1 ) * sizeof ( char ) ) ;
if ( ( * rank_names_p ) [ n ] = = NULL )
2016-10-27 18:56:11 +02:00
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxon rank name " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
2017-01-11 16:36:08 +01:00
strcpy ( ( * rank_names_p ) [ n ] , elt ) ;
2016-10-27 18:56:11 +02:00
}
i + + ;
}
n + + ;
}
// Check that fgets stopped because it reached EOF
if ( ! feof ( file ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: file reading was stopped before end of file " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
// Store count
( tax - > taxa ) - > count = n ;
2016-11-03 17:59:21 +01:00
( tax - > taxa ) - > ncbi_count = n ;
( tax - > taxa ) - > local_count = 0 ;
2016-10-27 18:56:11 +02:00
// Truncate the structure memory to the right size
tax - > taxa = ( ecotxidx_t * ) realloc ( tax - > taxa , sizeof ( ecotxidx_t ) + sizeof ( ecotx_t ) * ( tax - > taxa ) - > count ) ;
if ( tax - > taxa = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for taxonomy structure " ) ;
2017-01-11 16:36:08 +01:00
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
2016-10-27 18:56:11 +02:00
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
2017-01-11 16:36:08 +01:00
if ( fclose ( file ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxdump file " ) ;
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
closedir ( tax_dir ) ;
return - 1 ;
}
2016-10-27 18:56:11 +02:00
}
}
2017-01-11 16:36:08 +01:00
if ( closedir ( tax_dir ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxdump directory " ) ;
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
closedir ( tax_dir ) ;
return - 1 ;
}
if ( ! file_found )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: could not find 'nodes.dmp' file in taxdump directory " ) ;
free ( * parent_taxids_p ) ;
free ( * rank_names_p ) ;
return - 1 ;
}
return 0 ;
}
2016-10-27 18:56:11 +02:00
2017-01-11 16:36:08 +01:00
int read_delnodes_dmp ( const char * taxdump , OBIDMS_taxonomy_p tax , int32_t * * delnodes_p , int32_t * delnodes_count )
{
struct dirent * dp ;
DIR * tax_dir ;
FILE * file ;
char * file_name ;
bool file_found = false ;
char line [ 2048 ] ; // TODO large enough?
char * elt ;
int buffer_size ;
int n ;
int old_taxid ;
buffer_size = 10000 ;
// Initializing the list of deleted nodes
* delnodes_p = ( int32_t * ) malloc ( sizeof ( int32_t ) * buffer_size ) ;
if ( * delnodes_p = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for the deleted taxids array " ) ;
return - 1 ;
}
2017-01-05 14:28:36 +01:00
tax_dir = opendir ( taxdump ) ;
if ( tax_dir = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem opening a taxdump directory " ) ;
2017-01-11 16:36:08 +01:00
closedir ( tax_dir ) ;
free ( * delnodes_p ) ;
return - 1 ;
2017-01-05 14:28:36 +01:00
}
2017-01-09 17:28:49 +01:00
// Go through taxonomy files
while ( ( dp = readdir ( tax_dir ) ) ! = NULL )
{
if ( strcmp ( dp - > d_name , " delnodes.dmp " ) = = 0 )
{
2017-01-11 16:36:08 +01:00
file_found = true ;
2017-01-09 17:28:49 +01:00
// Allocating the memory for the file name
2017-01-11 16:36:08 +01:00
file_name = ( char * ) malloc ( ( strlen ( taxdump ) + 14 ) * sizeof ( char ) ) ;
2017-01-09 17:28:49 +01:00
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a file name " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
free ( * delnodes_p ) ;
return - 1 ;
2017-01-09 17:28:49 +01:00
}
// Build the file path
if ( sprintf ( file_name , " %s/delnodes.dmp " , taxdump ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a taxonomy file name " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
free ( * delnodes_p ) ;
2017-01-09 17:28:49 +01:00
free ( file_name ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2017-01-09 17:28:49 +01:00
}
file = fopen ( file_name , " r " ) ;
if ( file = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem opening a taxonomy file " ) ;
closedir ( tax_dir ) ;
free ( file_name ) ;
2017-01-11 16:36:08 +01:00
free ( * delnodes_p ) ;
return - 1 ;
2017-01-09 17:28:49 +01:00
}
free ( file_name ) ;
n = 0 ;
while ( fgets ( line , sizeof ( line ) , file ) )
{
// Check for terminal '\n' character (line complete)
if ( line [ strlen ( line ) - 1 ] ! = ' \n ' )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: line buffer size not large enough for line in taxonomy file " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
free ( * delnodes_p ) ;
return - 1 ;
2017-01-09 17:28:49 +01:00
}
// Get first and only element of the line (the deprecated taxid)
elt = strtok ( line , " | " ) ;
// Remove the last character (tab character)
elt [ strlen ( elt ) - 1 ] = ' \0 ' ;
// First element: old deprecated taxid
old_taxid = atoi ( elt ) ;
// Store the old taxid in the list of deleted taxids
// Enlarge array if needed
if ( n = = buffer_size )
{
buffer_size = buffer_size * 2 ;
2017-11-24 18:01:30 +01:00
( * delnodes_p ) = ( int32_t * ) realloc ( * delnodes_p , sizeof ( int32_t ) * buffer_size ) ;
2017-01-11 16:36:08 +01:00
if ( ( * delnodes_p ) = = NULL )
2017-01-09 17:28:49 +01:00
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a taxonomy structure " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2017-01-09 17:28:49 +01:00
}
}
2017-01-11 16:36:08 +01:00
( * delnodes_p ) [ n ] = old_taxid ;
2017-01-09 17:28:49 +01:00
n + + ;
}
// Check that fgets stopped because it reached EOF
if ( ! feof ( file ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: file reading was stopped before end of file " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
free ( * delnodes_p ) ;
return - 1 ;
2017-01-09 17:28:49 +01:00
}
// Store count
2017-01-11 16:36:08 +01:00
* delnodes_count = n ;
2017-01-09 17:28:49 +01:00
2017-01-11 16:36:08 +01:00
if ( fclose ( file ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxdump file " ) ;
free ( * delnodes_p ) ;
closedir ( tax_dir ) ;
return - 1 ;
}
2017-01-09 17:28:49 +01:00
}
}
2017-01-11 16:36:08 +01:00
if ( closedir ( tax_dir ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxdump directory " ) ;
free ( * delnodes_p ) ;
return - 1 ;
}
2017-01-09 17:28:49 +01:00
2017-01-11 16:36:08 +01:00
if ( ! file_found )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: could not find 'delnodes.dmp' file in taxdump directory " ) ;
free ( * delnodes_p ) ;
return - 1 ;
}
return 0 ;
}
int read_merged_dmp ( const char * taxdump , OBIDMS_taxonomy_p tax , int32_t * delnodes , int32_t delnodes_count )
{
int n , nD , nT ;
int taxid , old_taxid ;
ecotx_t * t ;
struct dirent * dp ;
DIR * tax_dir ;
FILE * file ;
char * file_name ;
bool file_found = false ;
char line [ 2048 ] ; // TODO large enough?
char * elt ;
int buffer_size ;
buffer_size = 10000 ;
2017-01-09 17:28:49 +01:00
tax_dir = opendir ( taxdump ) ;
if ( tax_dir = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem opening a taxdump directory " ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2017-01-09 17:28:49 +01:00
}
2017-01-05 14:28:36 +01:00
// Go through taxonomy files
while ( ( dp = readdir ( tax_dir ) ) ! = NULL )
{
if ( strcmp ( dp - > d_name , " merged.dmp " ) = = 0 )
{
2017-01-11 16:36:08 +01:00
file_found = true ;
2017-01-05 14:28:36 +01:00
// Initializing the merged structure
tax - > merged_idx = ( ecomergedidx_t * ) malloc ( sizeof ( ecomergedidx_t ) + sizeof ( ecomerged_t ) * buffer_size ) ;
if ( tax - > merged_idx = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a taxonomy structure " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2017-01-05 14:28:36 +01:00
}
// Allocating the memory for the file name
file_name = ( char * ) malloc ( ( strlen ( taxdump ) + 12 ) * sizeof ( char ) ) ;
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a file name " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2017-01-05 14:28:36 +01:00
}
// Build the file path
if ( sprintf ( file_name , " %s/merged.dmp " , taxdump ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a taxonomy file name " ) ;
closedir ( tax_dir ) ;
free ( file_name ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2017-01-05 14:28:36 +01:00
}
file = fopen ( file_name , " r " ) ;
if ( file = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem opening a taxonomy file " ) ;
closedir ( tax_dir ) ;
free ( file_name ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2017-01-05 14:28:36 +01:00
}
free ( file_name ) ;
2017-01-09 17:28:49 +01:00
nT = 0 ; // to point in current taxa list while merging
nD = delnodes_count - 1 ; // to point in deleted taxids list while merging (going from count-1 to 0 because taxids are sorted in descending order)
n = 0 ; // to point in final merged list while merging
2017-01-05 14:28:36 +01:00
while ( fgets ( line , sizeof ( line ) , file ) )
{
// Check for terminal '\n' character (line complete)
if ( line [ strlen ( line ) - 1 ] ! = ' \n ' )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: line buffer size not large enough for line in taxonomy file " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
fclose ( file ) ;
return - 1 ;
2017-01-05 14:28:36 +01:00
}
// Parse the 2 elements separated by '|'
// Get first element
elt = strtok ( line , " | " ) ;
// Remove the last character (tab character)
elt [ strlen ( elt ) - 1 ] = ' \0 ' ;
// First element: old deprecated taxid
old_taxid = atoi ( elt ) ;
// Get 2nd element: new taxid
elt = strtok ( NULL , " | " ) ;
// Remove the first and the last characters (tab characters)
elt = elt + 1 ;
elt [ strlen ( elt ) - 1 ] = ' \0 ' ;
taxid = atoi ( elt ) ;
// Store the old taxid in the merged_idx ordered taxid list
2017-01-09 17:28:49 +01:00
// The merged list is an ordered list of the current taxids, the deprecated taxids that have current references,
// and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index
// of the taxon in the taxa structure, or -1 for deleted taxids.
// Creating the merged list requires to merge the 3 ordered lists into one.
2017-11-24 18:01:30 +01:00
while ( ( ( nT < ( tax - > taxa ) - > count ) & & ( ( tax - > taxa ) - > taxon [ nT ] . taxid < old_taxid ) ) | | ( ( nD > = 0 ) & & ( delnodes [ nD ] < old_taxid ) ) )
2017-01-05 14:28:36 +01:00
{
2017-01-09 17:28:49 +01:00
if ( ( tax - > taxa ) - > taxon [ nT ] . taxid < delnodes [ nD ] )
{ // Add element from taxa list
// Enlarge structure if needed
if ( n = = buffer_size )
2017-01-05 14:28:36 +01:00
{
2017-01-09 17:28:49 +01:00
buffer_size = buffer_size * 2 ;
tax - > merged_idx = ( ecomergedidx_t * ) realloc ( tax - > merged_idx , sizeof ( ecomergedidx_t ) + sizeof ( ecomerged_t ) * buffer_size ) ;
if ( tax - > merged_idx = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a taxonomy structure " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
fclose ( file ) ;
return - 1 ;
2017-01-09 17:28:49 +01:00
}
2017-01-05 14:28:36 +01:00
}
2017-01-09 17:28:49 +01:00
( tax - > merged_idx ) - > merged [ n ] . taxid = ( tax - > taxa ) - > taxon [ nT ] . taxid ;
( tax - > merged_idx ) - > merged [ n ] . idx = nT ;
2017-11-24 18:01:30 +01:00
2017-01-09 17:28:49 +01:00
nT + + ;
n + + ;
2017-01-05 14:28:36 +01:00
}
2017-01-09 17:28:49 +01:00
else if ( delnodes [ nD ] < ( tax - > taxa ) - > taxon [ nT ] . taxid )
{ // Add element from deleted taxids list
// Enlarge structure if needed
if ( n = = buffer_size )
{
buffer_size = buffer_size * 2 ;
tax - > merged_idx = ( ecomergedidx_t * ) realloc ( tax - > merged_idx , sizeof ( ecomergedidx_t ) + sizeof ( ecomerged_t ) * buffer_size ) ;
if ( tax - > merged_idx = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a taxonomy structure " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
fclose ( file ) ;
return - 1 ;
2017-01-09 17:28:49 +01:00
}
}
2017-01-05 14:28:36 +01:00
2017-01-09 17:28:49 +01:00
( tax - > merged_idx ) - > merged [ n ] . taxid = delnodes [ nD ] ;
( tax - > merged_idx ) - > merged [ n ] . idx = - 1 ; // The index to tag deleted taxids is -1
2017-11-24 18:01:30 +01:00
2017-01-09 17:28:49 +01:00
nD - - ;
n + + ;
}
2017-01-05 14:28:36 +01:00
}
2017-01-09 17:28:49 +01:00
// Add the deprecated taxid
// Enlarge structure if needed
2017-01-05 14:28:36 +01:00
if ( n = = buffer_size )
{
buffer_size = buffer_size * 2 ;
tax - > merged_idx = ( ecomergedidx_t * ) realloc ( tax - > merged_idx , sizeof ( ecomergedidx_t ) + sizeof ( ecomerged_t ) * buffer_size ) ;
if ( tax - > merged_idx = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a taxonomy structure " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
fclose ( file ) ;
return - 1 ;
2017-01-05 14:28:36 +01:00
}
}
// Store the deprecated taxid with the index that refers to the new taxid
// Find the index of the new taxid
2017-01-18 18:22:49 +01:00
t = get_taxon_with_current_taxid ( tax , taxid ) ;
2017-01-05 14:28:36 +01:00
// Store the old taxid with the index
( tax - > merged_idx ) - > merged [ n ] . taxid = old_taxid ;
( tax - > merged_idx ) - > merged [ n ] . idx = t - > idx ;
2017-11-24 18:01:30 +01:00
2017-01-05 14:28:36 +01:00
n + + ;
}
// Check that fgets stopped because it reached EOF
if ( ! feof ( file ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: file reading was stopped before end of file " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
fclose ( file ) ;
return - 1 ;
2017-01-05 14:28:36 +01:00
}
// Store count
( tax - > merged_idx ) - > count = n ;
// Truncate the structure memory to the right size
tax - > merged_idx = ( ecomergedidx_t * ) realloc ( tax - > merged_idx , sizeof ( ecomergedidx_t ) + sizeof ( ecomerged_t ) * ( tax - > merged_idx ) - > count ) ;
if ( tax - > merged_idx = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a a taxonomy structure " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
fclose ( file ) ;
return - 1 ;
2017-01-05 14:28:36 +01:00
}
2017-01-11 16:36:08 +01:00
if ( fclose ( file ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxdump file " ) ;
closedir ( tax_dir ) ;
return - 1 ;
}
2017-01-05 14:28:36 +01:00
}
}
2017-01-11 16:36:08 +01:00
if ( closedir ( tax_dir ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxdump directory " ) ;
closedir ( tax_dir ) ;
return - 1 ;
}
if ( ! file_found )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: could not find 'merged.dmp' file in taxdump directory " ) ;
return - 1 ;
}
2017-01-09 17:28:49 +01:00
// Free delnodes array, not needed anymore
free ( delnodes ) ;
2017-01-11 16:36:08 +01:00
return 0 ;
}
2017-01-05 14:28:36 +01:00
2017-01-11 16:36:08 +01:00
int read_names_dmp ( const char * taxdump , OBIDMS_taxonomy_p tax )
{
int i , j , n ;
int taxid ;
struct dirent * dp ;
DIR * tax_dir ;
FILE * file ;
char * file_name ;
bool file_found = false ;
char line [ 2048 ] ; // TODO large enough?
char * elt ;
int buffer_size ;
buffer_size = 10000 ;
2016-10-27 18:56:11 +02:00
tax_dir = opendir ( taxdump ) ;
if ( tax_dir = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem opening a taxdump directory " ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
// Go through taxonomy files
while ( ( dp = readdir ( tax_dir ) ) ! = NULL )
{
if ( strcmp ( dp - > d_name , " names.dmp " ) = = 0 )
{
2017-01-11 16:36:08 +01:00
file_found = true ;
2016-10-27 18:56:11 +02:00
// Initializing the names structure
tax - > names = ( econameidx_t * ) malloc ( sizeof ( econameidx_t ) + sizeof ( econame_t ) * buffer_size ) ;
if ( tax - > names = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a taxonomy structure " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
// Allocating the memory for the file name
2017-01-05 14:28:36 +01:00
file_name = ( char * ) malloc ( ( strlen ( taxdump ) + 11 ) * sizeof ( char ) ) ;
2016-10-27 18:56:11 +02:00
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a file name " ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
// Build the file path
if ( sprintf ( file_name , " %s/names.dmp " , taxdump ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a taxonomy file name " ) ;
free ( file_name ) ;
2017-01-11 16:36:08 +01:00
closedir ( tax_dir ) ;
return - 1 ;
2016-10-27 18:56:11 +02:00
}
file = fopen ( file_name , " r " ) ;
if ( file = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem opening a taxonomy file " ) ;
free ( file_name ) ;
2017-01-11 16:36:08 +01:00
closedir ( tax_dir ) ;
return - 1 ;
2016-10-27 18:56:11 +02:00
}
free ( file_name ) ;
n = 0 ;
j = 0 ;
while ( fgets ( line , sizeof ( line ) , file ) )
{
// Enlarge structures if needed
if ( n = = buffer_size )
{
buffer_size = buffer_size * 2 ;
tax - > names = ( econameidx_t * ) realloc ( tax - > names , sizeof ( econameidx_t ) + sizeof ( econame_t ) * buffer_size ) ;
if ( tax - > names = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a taxonomy structure " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
}
// Check for terminal '\n' character (line complete)
if ( line [ strlen ( line ) - 1 ] ! = ' \n ' )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: line buffer size not large enough for line in taxonomy file " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
// Parse 4 first elements separated by '|'
elt = strtok ( line , " | " ) ;
// Remove the last character (tab character)
elt [ strlen ( elt ) - 1 ] = ' \0 ' ;
// First element: taxid
taxid = atoi ( elt ) ;
// Find taxid in taxa structure and store pointer in names structure
i = j ;
while ( ( i < ( tax - > taxa ) - > count ) & & ( ( tax - > taxa ) - > taxon [ i ] . taxid ! = taxid ) )
i + + ;
if ( i = = ( tax - > taxa ) - > count )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: could not find taxon associated to name when reading taxdump " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
j = i ; // Because there are several names by taxon but they are in the same order
( tax - > names ) - > names [ n ] . taxon = ( ( tax - > taxa ) - > taxon ) + i ;
i = 1 ;
while ( i < 4 )
{
elt = strtok ( NULL , " | " ) ;
// Remove the first and the last characters (tab characters)
elt = elt + 1 ;
elt [ strlen ( elt ) - 1 ] = ' \0 ' ;
if ( i = = 1 ) // Name
{
( tax - > names ) - > names [ n ] . name = ( char * ) malloc ( ( strlen ( elt ) + 1 ) * sizeof ( char ) ) ;
if ( ( tax - > names ) - > names [ n ] . name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon name " ) ;
obi_close_taxonomy ( tax ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
strcpy ( ( tax - > names ) - > names [ n ] . name , elt ) ;
}
else if ( i = = 3 ) // Class name
{
( tax - > names ) - > names [ n ] . class_name = ( char * ) malloc ( ( strlen ( elt ) + 1 ) * sizeof ( char ) ) ;
if ( ( tax - > names ) - > names [ n ] . class_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon class name " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
strcpy ( ( tax - > names ) - > names [ n ] . class_name , elt ) ;
if ( strcmp ( elt , " scientific name " ) = = 0 )
{
( tax - > names ) - > names [ n ] . is_scientific_name = 1 ;
}
else
( tax - > names ) - > names [ n ] . is_scientific_name = 0 ;
}
i + + ;
}
n + + ;
}
// Check that fgets stopped because it reached EOF
if ( ! feof ( file ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: file reading was stopped before end of file " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
// Store count
( tax - > names ) - > count = n ;
// Truncate the structure memory to the right size
tax - > names = ( econameidx_t * ) realloc ( tax - > names , sizeof ( econameidx_t ) + sizeof ( econame_t ) * ( tax - > names ) - > count ) ;
if ( tax - > names = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a a taxonomy structure " ) ;
fclose ( file ) ;
closedir ( tax_dir ) ;
2017-01-11 16:36:08 +01:00
return - 1 ;
2016-10-27 18:56:11 +02:00
}
2017-01-11 16:36:08 +01:00
if ( fclose ( file ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxdump file " ) ;
closedir ( tax_dir ) ;
return - 1 ;
}
2016-10-27 18:56:11 +02:00
}
}
2017-01-11 16:36:08 +01:00
if ( closedir ( tax_dir ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxdump directory " ) ;
closedir ( tax_dir ) ;
return - 1 ;
}
2016-10-27 18:56:11 +02:00
2017-01-11 16:36:08 +01:00
if ( ! file_found )
2016-10-27 18:56:11 +02:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Error: could not find 'merged.dmp' file in taxdump directory " ) ;
return - 1 ;
}
return 0 ;
}
2017-01-18 18:22:49 +01:00
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2017-01-11 16:36:08 +01:00
OBIDMS_taxonomy_p obi_read_taxdump ( const char * taxdump )
{
OBIDMS_taxonomy_p tax ;
char * * rank_names = NULL ;
int * parent_taxids = NULL ;
int32_t * delnodes = NULL ;
int32_t delnodes_count ;
bool already_in ;
ecotx_t * t ;
int buffer_size ;
int i , j ;
// Initialize taxonomy structure
tax = ( OBIDMS_taxonomy_p ) malloc ( sizeof ( OBIDMS_taxonomy_t ) ) ;
if ( tax = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a taxonomy structure " ) ;
return NULL ;
}
2017-01-16 17:28:20 +01:00
tax - > ranks = NULL ;
tax - > taxa = NULL ;
tax - > names = NULL ;
tax - > preferred_names = NULL ;
tax - > merged_idx = NULL ;
2017-01-11 16:36:08 +01:00
tax - > dms = NULL ;
( tax - > tax_name ) [ 0 ] = ' \0 ' ;
// TODO check if taxdump path is for a gz file to unzip or a directory
// READ NODES.DMP
if ( read_nodes_dmp ( taxdump , tax , & rank_names , & parent_taxids ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem reading 'nodes.dmp' " ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
// READ DELNODES.DMP
if ( read_delnodes_dmp ( taxdump , tax , & delnodes , & delnodes_count ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem reading 'delnodes.dmp' " ) ;
2016-10-27 18:56:11 +02:00
obi_close_taxonomy ( tax ) ;
free ( rank_names ) ;
2017-01-11 16:36:08 +01:00
free ( parent_taxids ) ;
2016-10-27 18:56:11 +02:00
return NULL ;
}
2017-01-11 16:36:08 +01:00
// READ MERGED.DMP
if ( read_merged_dmp ( taxdump , tax , delnodes , delnodes_count ) < 0 )
2016-10-27 18:56:11 +02:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Problem reading 'merged.dmp' " ) ;
2016-10-27 18:56:11 +02:00
obi_close_taxonomy ( tax ) ;
2017-01-11 16:36:08 +01:00
free ( delnodes ) ;
free ( rank_names ) ;
2016-10-27 18:56:11 +02:00
free ( parent_taxids ) ;
2017-01-11 16:36:08 +01:00
return NULL ;
}
// READ NAMES.DMP
if ( read_names_dmp ( taxdump , tax ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Problem reading 'names.dmp' " ) ;
obi_close_taxonomy ( tax ) ;
2016-10-27 18:56:11 +02:00
free ( rank_names ) ;
2017-01-11 16:36:08 +01:00
free ( parent_taxids ) ;
2016-10-27 18:56:11 +02:00
return NULL ;
}
// Go through data to fill the taxonomy structure
// Build rank list
// Initialize rank structure
buffer_size = 10 ;
tax - > ranks = ( ecorankidx_t * ) malloc ( sizeof ( ecorankidx_t ) + sizeof ( char * ) * buffer_size ) ;
if ( tax - > ranks = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxon rank array " ) ;
obi_close_taxonomy ( tax ) ;
free ( parent_taxids ) ;
free ( rank_names ) ;
return NULL ;
}
( tax - > ranks ) - > count = 0 ;
for ( i = 0 ; i < ( tax - > taxa ) - > count ; i + + )
{
already_in = false ;
for ( j = 0 ; j < ( tax - > ranks ) - > count ; j + + )
{
if ( strcmp ( rank_names [ i ] , ( ( tax - > ranks ) - > label ) [ j ] ) = = 0 )
{
already_in = true ;
break ;
}
}
if ( ! already_in )
{
// Realloc rank structure if needed
if ( ( tax - > ranks ) - > count = = buffer_size )
{
buffer_size = buffer_size + 10 ;
tax - > ranks = ( ecorankidx_t * ) realloc ( tax - > ranks , sizeof ( ecorankidx_t ) + sizeof ( char * ) * buffer_size ) ;
if ( tax - > ranks = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for taxon ranks " ) ;
obi_close_taxonomy ( tax ) ;
free ( parent_taxids ) ;
free ( rank_names ) ;
return NULL ;
}
}
// Store new rank
( ( tax - > ranks ) - > label ) [ ( tax - > ranks ) - > count ] = ( char * ) malloc ( ( strlen ( rank_names [ i ] ) + 1 ) * sizeof ( char ) ) ;
if ( ( ( tax - > ranks ) - > label ) [ ( tax - > ranks ) - > count ] = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxon rank names " ) ;
obi_close_taxonomy ( tax ) ;
free ( parent_taxids ) ;
free ( rank_names ) ;
return NULL ;
}
strcpy ( ( ( tax - > ranks ) - > label ) [ ( tax - > ranks ) - > count ] , rank_names [ i ] ) ;
( ( tax - > ranks ) - > count ) + + ;
}
}
// Truncate to the number of ranks recorded
tax - > ranks = ( ecorankidx_t * ) realloc ( tax - > ranks , sizeof ( ecorankidx_t ) + sizeof ( char * ) * ( tax - > ranks ) - > count ) ;
if ( tax - > ranks = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for taxon ranks " ) ;
obi_close_taxonomy ( tax ) ;
free ( parent_taxids ) ;
free ( rank_names ) ;
return NULL ;
}
// Sort in alphabetical order
qsort ( ( tax - > ranks ) - > label , ( tax - > ranks ) - > count , sizeof ( char * ) , cmp_str ) ;
// Associate the taxa with their rank indices
for ( i = 0 ; i < ( tax - > taxa ) - > count ; i + + )
{
for ( j = 0 ; j < ( tax - > ranks ) - > count ; j + + )
{
if ( strcmp ( rank_names [ i ] , ( ( tax - > ranks ) - > label ) [ j ] ) = = 0 )
{
( ( tax - > taxa ) - > taxon ) [ i ] . rank = j ;
break ;
}
}
}
// Associate the taxa with their scientific name
for ( i = 0 ; i < ( tax - > names ) - > count ; i + + )
{
if ( ( tax - > names ) - > names [ i ] . is_scientific_name )
{
( ( tax - > names ) - > names [ i ] . taxon ) - > name = ( char * ) malloc ( ( strlen ( ( ( ( tax - > names ) - > names ) [ i ] ) . name ) + 1 ) * sizeof ( char ) ) ;
if ( ( ( tax - > names ) - > names [ i ] . taxon ) - > name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for taxon ranks " ) ;
obi_close_taxonomy ( tax ) ;
free ( parent_taxids ) ;
free ( rank_names ) ;
return NULL ;
}
strcpy ( ( ( tax - > names ) - > names [ i ] . taxon ) - > name , ( ( ( tax - > names ) - > names ) [ i ] ) . name ) ;
}
}
// Sort names in alphabetical order
qsort ( ( tax - > names ) - > names , ( tax - > names ) - > count , sizeof ( econame_t ) , cmp_names ) ;
// Associate the taxa with their parent
for ( i = 0 ; i < ( tax - > taxa ) - > count ; i + + )
{
2017-01-18 18:22:49 +01:00
( ( tax - > taxa ) - > taxon ) [ i ] . parent = get_taxon_with_current_taxid ( tax , parent_taxids [ i ] ) ;
2016-10-27 18:56:11 +02:00
if ( ( ( tax - > taxa ) - > taxon ) [ i ] . parent = = NULL )
{
2016-11-03 17:59:21 +01:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2016-10-27 18:56:11 +02:00
obidebug ( 1 , " \n Error: taxon parent not found " ) ;
obi_close_taxonomy ( tax ) ;
free ( parent_taxids ) ;
free ( rank_names ) ;
return NULL ;
}
( ( ( tax - > taxa ) - > taxon ) [ i ] . parent ) - > farest = 0 ;
}
2017-01-16 17:28:20 +01:00
// Initialize preferred names to NULL
for ( i = 0 ; i < ( tax - > taxa ) - > count ; i + + )
( ( tax - > taxa ) - > taxon ) [ i ] . preferred_name = NULL ;
2016-10-27 18:56:11 +02:00
( tax - > taxa ) - > buffer_size = ( tax - > taxa ) - > count ;
2017-01-11 16:36:08 +01:00
// Compute longest branches (used to compute distances between taxa faster)
2016-10-27 18:56:11 +02:00
for ( i = 0 ; i < ( tax - > taxa ) - > count ; i + + )
{
t = ( ( ( tax - > taxa ) ) - > taxon ) + i ;
if ( t - > farest = = - 1 )
{
t - > farest = 0 ;
while ( t - > parent ! = t )
{
j = t - > farest + 1 ;
if ( j > t - > parent - > farest )
{
t - > parent - > farest = j ;
t = t - > parent ;
}
else
t = ( tax - > taxa ) - > taxon ;
}
}
}
// Freeing
free ( parent_taxids ) ;
for ( i = 0 ; i < ( tax - > taxa ) - > count ; i + + )
free ( rank_names [ i ] ) ;
free ( rank_names ) ;
return tax ;
}
2016-11-03 17:59:21 +01:00
OBIDMS_taxonomy_p obi_read_taxonomy ( OBIDMS_p dms , const char * taxonomy_name , bool read_alternative_names )
{
OBIDMS_taxonomy_p tax ;
char * taxonomy_path ;
char * ranks_file_name ;
char * taxa_file_name ;
2017-01-06 15:52:21 +01:00
char * merged_idx_file_name ;
2016-11-03 17:59:21 +01:00
char * local_taxa_file_name ;
char * alter_names_file_name ;
2017-01-16 17:28:20 +01:00
char * pref_names_file_name ;
2016-11-03 17:59:21 +01:00
int buffer_size ;
tax = ( OBIDMS_taxonomy_p ) malloc ( sizeof ( OBIDMS_taxonomy_t ) ) ;
if ( tax = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxonomy structure " ) ;
return NULL ;
}
2017-01-16 17:28:20 +01:00
tax - > ranks = NULL ;
tax - > taxa = NULL ;
tax - > names = NULL ;
tax - > preferred_names = NULL ;
tax - > merged_idx = NULL ;
2016-11-03 17:59:21 +01:00
tax - > dms = dms ;
strcpy ( tax - > tax_name , taxonomy_name ) ;
buffer_size = 2048 ;
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
if ( taxonomy_path = = NULL )
return NULL ;
// Read ranks
ranks_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( ranks_file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for ranks file name " ) ;
free ( taxonomy_path ) ;
free ( tax ) ;
return NULL ;
}
if ( snprintf ( ranks_file_name , buffer_size , " %s/%s.rdx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building ranks file name " ) ;
free ( taxonomy_path ) ;
free ( ranks_file_name ) ;
free ( tax ) ;
return NULL ;
}
2017-01-16 17:28:20 +01:00
tax - > ranks = read_ranks_idx ( ranks_file_name ) ;
2016-11-03 17:59:21 +01:00
if ( tax - > ranks = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-01-11 16:36:08 +01:00
obidebug ( 1 , " \n Error reading taxonomy ranks file (check taxonomy name spelling) " ) ;
2016-11-03 17:59:21 +01:00
free ( taxonomy_path ) ;
free ( ranks_file_name ) ;
free ( tax ) ;
return NULL ;
}
free ( ranks_file_name ) ;
// Read taxa
taxa_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( taxa_file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxa file name " ) ;
free ( taxonomy_path ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
if ( snprintf ( taxa_file_name , buffer_size , " %s/%s.tdx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building taxa file name " ) ;
free ( taxonomy_path ) ;
free ( taxa_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
local_taxa_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( local_taxa_file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for taxa file name " ) ;
free ( taxonomy_path ) ;
free ( taxa_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
if ( snprintf ( local_taxa_file_name , buffer_size , " %s/%s.ldx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building local taxa file name " ) ;
free ( taxonomy_path ) ;
free ( taxa_file_name ) ;
free ( local_taxa_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
2017-01-16 17:28:20 +01:00
tax - > taxa = read_taxonomy_idx ( taxa_file_name , local_taxa_file_name ) ;
2016-11-03 17:59:21 +01:00
if ( tax - > taxa = = NULL )
{
free ( taxonomy_path ) ;
free ( taxa_file_name ) ;
free ( local_taxa_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
free ( taxa_file_name ) ;
free ( local_taxa_file_name ) ;
2017-01-06 15:52:21 +01:00
// Read merged index (old and current taxids referring to indices in the taxa structure)
merged_idx_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( merged_idx_file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for merged index file name " ) ;
free ( taxonomy_path ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
if ( snprintf ( merged_idx_file_name , buffer_size , " %s/%s.adx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building merged index file name " ) ;
free ( taxonomy_path ) ;
free ( merged_idx_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
2017-01-16 17:28:20 +01:00
tax - > merged_idx = read_merged_idx ( merged_idx_file_name , tax ) ;
2017-01-06 15:52:21 +01:00
if ( tax - > merged_idx = = NULL )
{
free ( taxonomy_path ) ;
free ( merged_idx_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
free ( merged_idx_file_name ) ;
2017-01-16 17:28:20 +01:00
// Read preferred names
pref_names_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( pref_names_file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for alternative names file name " ) ;
free ( taxonomy_path ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
if ( snprintf ( pref_names_file_name , buffer_size , " %s/%s.pdx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building alternative names file name " ) ;
free ( taxonomy_path ) ;
free ( pref_names_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
tax - > preferred_names = read_preferred_names_idx ( pref_names_file_name , tax ) ;
if ( obi_errno )
{
free ( taxonomy_path ) ;
free ( pref_names_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
free ( pref_names_file_name ) ;
if ( tax - > preferred_names ! = NULL )
fprintf ( stderr , " \n Preferred names read " ) ;
2016-11-03 17:59:21 +01:00
// Read alternative names
if ( read_alternative_names )
{
alter_names_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( alter_names_file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for alternative names file name " ) ;
free ( taxonomy_path ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
if ( snprintf ( alter_names_file_name , buffer_size , " %s/%s.ndx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building alternative names file name " ) ;
free ( taxonomy_path ) ;
free ( alter_names_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
2017-01-16 17:28:20 +01:00
tax - > names = read_names_idx ( alter_names_file_name , tax ) ;
2016-11-03 17:59:21 +01:00
if ( tax - > names = = NULL )
{
free ( taxonomy_path ) ;
free ( alter_names_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
free ( alter_names_file_name ) ;
}
free ( taxonomy_path ) ;
return tax ;
}
2017-01-18 18:22:49 +01:00
int obi_write_taxonomy ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * tax_name )
2016-11-03 17:59:21 +01:00
{
2017-01-18 18:22:49 +01:00
char * taxonomy_path ;
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , tax_name ) ;
if ( taxonomy_path = = NULL )
return - 1 ;
2016-11-03 17:59:21 +01:00
2017-01-18 18:22:49 +01:00
// Try to create the directory
if ( mkdir ( taxonomy_path , 00777 ) < 0 )
2016-11-03 17:59:21 +01:00
{
2017-01-18 18:22:49 +01:00
if ( errno = = EEXIST )
obidebug ( 1 , " \n A taxonomy already exists with this name. " ) ;
obidebug ( 1 , " \n Problem creating a new taxonomy directory " ) ;
free ( taxonomy_path ) ;
return - 1 ;
2016-11-03 17:59:21 +01:00
}
2017-01-18 18:22:49 +01:00
free ( taxonomy_path ) ;
if ( write_ranks_idx ( dms , tax , tax_name ) < 0 )
return - 1 ;
if ( write_taxonomy_idx ( dms , tax , tax_name ) < 0 )
return - 1 ;
if ( write_names_idx ( dms , tax , tax_name ) < 0 )
return - 1 ;
if ( write_merged_idx ( dms , tax , tax_name ) < 0 )
return - 1 ;
// Check if there are local taxa (if so last taxon is local)
if ( ( tax - > taxa ) - > local_count > 0 )
if ( write_local_taxonomy_idx ( dms , tax , tax_name ) < 0 )
return - 1 ;
2017-01-16 17:28:20 +01:00
// Write preferred names if there are some
2017-01-18 18:22:49 +01:00
if ( tax - > preferred_names ! = NULL )
if ( write_preferred_names_idx ( dms , tax , tax_name ) < 0 )
2017-01-16 17:28:20 +01:00
return - 1 ;
2017-01-18 18:22:49 +01:00
return 0 ;
}
int obi_close_taxonomy ( OBIDMS_taxonomy_p taxonomy )
{
int i ;
2016-11-03 17:59:21 +01:00
if ( taxonomy )
{
2017-01-18 18:22:49 +01:00
// Update local informations (local taxa and preferred names) if there are any
if ( ( taxonomy - > taxa ) - > local_count > 0 )
{
if ( taxonomy - > dms = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error closing a taxonomy with local files but no DMS associated (probably read directly from taxdump) " ) ; // TODO discuss
}
if ( write_local_taxonomy_idx ( taxonomy - > dms , taxonomy , taxonomy - > tax_name ) < 0 )
return - 1 ;
}
// Write preferred names if there are some
if ( taxonomy - > preferred_names )
{
if ( write_preferred_names_idx ( taxonomy - > dms , taxonomy , taxonomy - > tax_name ) < 0 )
return - 1 ;
// Free preferred names
for ( i = 0 ; i < ( taxonomy - > preferred_names ) - > count ; i + + )
{
if ( ( ( taxonomy - > preferred_names ) - > names [ i ] ) . name )
free ( ( ( taxonomy - > preferred_names ) - > names [ i ] ) . name ) ;
if ( ( ( taxonomy - > preferred_names ) - > names [ i ] ) . class_name )
free ( ( ( taxonomy - > preferred_names ) - > names [ i ] ) . class_name ) ;
}
free ( taxonomy - > preferred_names ) ;
}
2016-11-03 17:59:21 +01:00
if ( taxonomy - > ranks )
{
for ( i = 0 ; i < ( taxonomy - > ranks ) - > count ; i + + )
{
if ( ( taxonomy - > ranks ) - > label [ i ] )
free ( ( taxonomy - > ranks ) - > label [ i ] ) ;
}
free ( taxonomy - > ranks ) ;
}
if ( taxonomy - > names )
{
for ( i = 0 ; i < ( taxonomy - > names ) - > count ; i + + )
{
if ( ( ( taxonomy - > names ) - > names [ i ] ) . name )
free ( ( ( taxonomy - > names ) - > names [ i ] ) . name ) ;
if ( ( ( taxonomy - > names ) - > names [ i ] ) . class_name )
free ( ( ( taxonomy - > names ) - > names [ i ] ) . class_name ) ;
}
free ( taxonomy - > names ) ;
}
if ( taxonomy - > taxa )
{
for ( i = 0 ; i < ( taxonomy - > taxa ) - > count ; i + + )
{
if ( ( ( taxonomy - > taxa ) - > taxon [ i ] ) . name )
free ( ( ( taxonomy - > taxa ) - > taxon [ i ] ) . name ) ;
}
free ( taxonomy - > taxa ) ;
}
2017-01-05 14:28:36 +01:00
if ( taxonomy - > merged_idx )
{
free ( taxonomy - > merged_idx ) ;
}
2016-11-03 17:59:21 +01:00
free ( taxonomy ) ;
}
return 0 ;
}
2017-01-18 18:22:49 +01:00
int obi_taxo_add_local_taxon ( OBIDMS_taxonomy_p tax , const char * name , const char * rank_name , int32_t parent_taxid , int32_t min_taxid )
{
int32_t taxid ;
ecotx_t * taxon ;
int i ;
// econame_t* name_struct;
// Enlarge the structure memory for a new taxon
tax - > taxa = ( ecotxidx_t * ) realloc ( tax - > taxa , sizeof ( ecotxidx_t ) + sizeof ( ecotx_t ) * ( ( ( tax - > taxa ) - > count ) + 1 ) ) ;
if ( tax - > taxa = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a taxonomy structure to add a new taxon " ) ;
return - 1 ;
}
// Compute new taxid that must be equal or greater than 1E7 and greater than the maximum taxid existing in the taxonomy
if ( min_taxid < MIN_LOCAL_TAXID )
min_taxid = MIN_LOCAL_TAXID ;
if ( min_taxid > ( tax - > taxa ) - > max_taxid )
taxid = min_taxid ;
else
taxid = ( ( tax - > taxa ) - > max_taxid ) + 1 ;
// Fill the ecotx_t node structure
taxon = ( ( tax - > taxa ) - > taxon ) + ( ( tax - > taxa ) - > count ) ;
taxon - > taxid = taxid ;
taxon - > idx = ( tax - > taxa ) - > count ;
taxon - > local = true ;
taxon - > name = ( char * ) malloc ( ( strlen ( name ) + 1 ) * sizeof ( char ) ) ;
if ( taxon - > name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon name to add a new taxon " ) ;
return - 1 ;
}
strcpy ( taxon - > name , name ) ;
taxon - > rank = - 1 ;
for ( i = 0 ; i < ( tax - > ranks ) - > count ; i + + )
{
if ( strcmp ( rank_name , ( ( tax - > ranks ) - > label ) [ i ] ) = = 0 )
{
taxon - > rank = i ;
break ;
}
}
if ( taxon - > rank = = - 1 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: taxon rank not found when adding a new taxon " ) ;
return - 1 ;
}
taxon - > parent = obi_taxo_get_taxon_with_taxid ( tax , parent_taxid ) ;
if ( taxon - > parent = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error: taxon parent not found when adding a new taxon " ) ;
return - 1 ;
}
taxon - > farest = 0 ;
// Update taxonomy counts etc
( tax - > taxa ) - > max_taxid = taxid ;
( ( tax - > taxa ) - > count ) + + ;
( ( tax - > taxa ) - > local_count ) + + ;
( tax - > taxa ) - > buffer_size = ( tax - > taxa ) - > count ;
// // Add new name in names structure // Commented because the new name was not added in the .ndx file in the OBITools1
// // Allocate memory for new name
// tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1));
// if (tax->names == NULL)
// {
// obi_set_errno(OBI_MALLOC_ERROR);
// obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
// return -1;
// }
//
// // Add new name
// name_struct = (tax->names)->names + ((tax->names)->count);
// name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
// if (name_struct->name == NULL)
// {
// obi_set_errno(OBI_MALLOC_ERROR);
// obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
// return -1;
// }
// strcpy(name_struct->name, name);
// name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char));
// if (name_struct->class_name == NULL)
// {
// obi_set_errno(OBI_MALLOC_ERROR);
// obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon");
// return -1;
// }
// strcpy(name_struct->class_name, "scientific name");
// name_struct->is_scientific_name = true;
// name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1;
//
// // Sort names in alphabetical order
// qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names);
//
// // Update name count
// ((tax->names)->count)++;
return taxid ;
}
int obi_taxo_add_preferred_name_with_taxid ( OBIDMS_taxonomy_p tax , int32_t taxid , const char * preferred_name )
{
ecotx_t * taxon ;
taxon = obi_taxo_get_taxon_with_taxid ( tax , taxid ) ;
return obi_taxo_add_preferred_name_with_taxon ( tax , taxon , preferred_name ) ;
}
int obi_taxo_add_preferred_name_with_taxon ( OBIDMS_taxonomy_p tax , ecotx_t * taxon , const char * preferred_name )
{
econame_t * name_struct ;
// Free previous preferred name if there is one
if ( taxon - > preferred_name ! = NULL )
free ( taxon - > preferred_name ) ;
taxon - > preferred_name = ( char * ) malloc ( ( strlen ( preferred_name ) + 1 ) * sizeof ( char ) ) ;
if ( taxon - > preferred_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a new preferred name for a taxon " ) ;
return - 1 ;
}
strcpy ( taxon - > preferred_name , preferred_name ) ;
// Add new name in preferred names structure
// Allocate or reallocate memory for new name
if ( tax - > preferred_names = = NULL )
{
tax - > preferred_names = ( econameidx_t * ) malloc ( sizeof ( econameidx_t ) + sizeof ( econame_t ) ) ;
( tax - > preferred_names ) - > count = 0 ;
}
else
tax - > preferred_names = ( econameidx_t * ) realloc ( tax - > preferred_names , sizeof ( econameidx_t ) + sizeof ( econame_t ) * ( ( tax - > preferred_names ) - > count + 1 ) ) ;
if ( tax - > preferred_names = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating memory for a taxonomy structure to add a new preferred name " ) ;
return - 1 ;
}
// Add new preferred name
name_struct = ( tax - > preferred_names ) - > names + ( ( tax - > preferred_names ) - > count ) ;
name_struct - > name = ( char * ) malloc ( ( strlen ( preferred_name ) + 1 ) * sizeof ( char ) ) ;
if ( name_struct - > name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a new taxon preferred name " ) ;
return - 1 ;
}
strcpy ( name_struct - > name , preferred_name ) ;
name_struct - > class_name = ( char * ) malloc ( ( strlen ( " preferred name " ) + 1 ) * sizeof ( char ) ) ;
if ( name_struct - > class_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for a taxon class name to add a new preferred name " ) ;
return - 1 ;
}
strcpy ( name_struct - > class_name , " preferred name " ) ;
name_struct - > is_scientific_name = false ;
name_struct - > taxon = taxon ;
// Sort preferred names in alphabetical order
qsort ( ( tax - > preferred_names ) - > names , ( tax - > preferred_names ) - > count , sizeof ( econame_t ) , cmp_names ) ;
// Update preferred name count
( ( tax - > preferred_names ) - > count ) + + ;
return 0 ;
}
2016-11-03 17:59:21 +01:00
ecotx_t * obi_taxo_get_parent_at_rank ( ecotx_t * taxon , int32_t rankidx )
{
ecotx_t * current_taxon ;
ecotx_t * next_taxon ;
2017-10-04 15:55:13 +02:00
if ( taxon = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error getting the parent of a taxon at a given rank: taxon pointer is NULL " ) ;
return NULL ;
}
2016-11-03 17:59:21 +01:00
current_taxon = taxon ;
next_taxon = current_taxon - > parent ;
while ( ( current_taxon ! = next_taxon ) & & // root node
( current_taxon - > rank ! = rankidx ) )
{
current_taxon = next_taxon ;
next_taxon = current_taxon - > parent ;
}
if ( current_taxon - > rank = = rankidx )
return current_taxon ;
else
return NULL ;
}
2017-01-06 15:52:21 +01:00
ecotx_t * obi_taxo_get_taxon_with_taxid ( OBIDMS_taxonomy_p taxonomy , int32_t taxid )
{
ecotx_t * current_taxon ;
ecomerged_t * indexed_taxon ;
int32_t count ;
2017-10-04 15:55:13 +02:00
if ( taxonomy = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get a taxon with its taxid: taxonomy pointer is NULL " ) ;
return NULL ;
}
2017-01-06 15:52:21 +01:00
count = ( taxonomy - > merged_idx ) - > count ;
indexed_taxon = ( ecomerged_t * ) bsearch ( ( const void * ) ( ( size_t ) taxid ) ,
( const void * ) taxonomy - > merged_idx - > merged ,
count ,
sizeof ( ecomerged_t ) ,
cmp_taxids_in_ecomerged_t ) ;
if ( indexed_taxon = = NULL )
current_taxon = NULL ;
2017-01-09 17:28:49 +01:00
else if ( indexed_taxon - > idx = = - 1 )
current_taxon = NULL ; // TODO discuss what to do when old deleted taxon
2017-01-06 15:52:21 +01:00
else
current_taxon = ( taxonomy - > taxa - > taxon ) + ( indexed_taxon - > idx ) ;
return current_taxon ;
}
2017-10-04 15:55:13 +02:00
int obi_taxo_is_taxon_under_taxid ( ecotx_t * taxon , int32_t other_taxid ) // TODO discuss that this doesn't work with deprecated taxids
2016-11-03 17:59:21 +01:00
{
ecotx_t * next_parent ;
next_parent = taxon - > parent ;
2017-10-04 15:55:13 +02:00
if ( taxon = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error checking if a taxon is under another: taxon pointer is NULL " ) ;
return - 1 ;
}
2016-11-03 17:59:21 +01:00
while ( ( other_taxid ! = next_parent - > taxid ) & & ( strcmp ( next_parent - > name , " root " ) ) )
next_parent = next_parent - > parent ;
if ( other_taxid = = next_parent - > taxid )
return 1 ;
else
return 0 ;
}
ecotx_t * obi_taxo_get_species ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
2017-10-04 15:55:13 +02:00
static int32_t rankindex = - 1 ;
2016-11-03 17:59:21 +01:00
2017-10-04 15:55:13 +02:00
if ( taxonomy = = NULL )
2016-11-03 17:59:21 +01:00
{
2017-10-04 15:55:13 +02:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the species associated with a taxon: taxonomy pointer is NULL " ) ;
return NULL ;
}
if ( taxon = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the species associated with a taxon: taxon pointer is NULL " ) ;
return NULL ;
2016-11-03 17:59:21 +01:00
}
2017-10-04 15:55:13 +02:00
rankindex = rank_label_to_index ( " species " , taxonomy - > ranks ) ;
if ( rankindex < 0 )
2016-11-03 17:59:21 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-10-04 15:55:13 +02:00
obidebug ( 1 , " \n Error trying to get the species associated with a taxon: error getting rank index " ) ;
2016-11-03 17:59:21 +01:00
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
ecotx_t * obi_taxo_get_genus ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
2017-10-04 15:55:13 +02:00
static int32_t rankindex = - 1 ;
if ( taxonomy = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the genus associated with a taxon: taxonomy pointer is NULL " ) ;
return NULL ;
}
2016-11-03 17:59:21 +01:00
2017-10-04 15:55:13 +02:00
if ( taxon = = NULL )
2016-11-03 17:59:21 +01:00
{
2017-10-04 15:55:13 +02:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the genus associated with a taxon: taxon pointer is NULL " ) ;
return NULL ;
2016-11-03 17:59:21 +01:00
}
2017-10-04 15:55:13 +02:00
rankindex = rank_label_to_index ( " genus " , taxonomy - > ranks ) ;
if ( rankindex < 0 )
2016-11-03 17:59:21 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-10-04 15:55:13 +02:00
obidebug ( 1 , " \n Error trying to get the genus associated with a taxon: error getting rank index " ) ;
2016-11-03 17:59:21 +01:00
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
ecotx_t * obi_taxo_get_family ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
2017-10-04 15:55:13 +02:00
static int32_t rankindex = - 1 ;
2016-11-03 17:59:21 +01:00
2017-10-04 15:55:13 +02:00
if ( taxonomy = = NULL )
2016-11-03 17:59:21 +01:00
{
2017-10-04 15:55:13 +02:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the family associated with a taxon: taxonomy pointer is NULL " ) ;
return NULL ;
2016-11-03 17:59:21 +01:00
}
2017-10-04 15:55:13 +02:00
if ( taxon = = NULL )
2016-11-03 17:59:21 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-10-04 15:55:13 +02:00
obidebug ( 1 , " \n Error trying to get the family associated with a taxon: taxon pointer is NULL " ) ;
return NULL ;
}
rankindex = rank_label_to_index ( " family " , taxonomy - > ranks ) ;
if ( rankindex < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the family associated with a taxon: error getting rank index " ) ;
2016-11-03 17:59:21 +01:00
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
ecotx_t * obi_taxo_get_kingdom ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
2017-10-04 15:55:13 +02:00
static int32_t rankindex = - 1 ;
2016-11-03 17:59:21 +01:00
2017-10-04 15:55:13 +02:00
if ( taxonomy = = NULL )
2016-11-03 17:59:21 +01:00
{
2017-10-04 15:55:13 +02:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the kingdom associated with a taxon: taxonomy pointer is NULL " ) ;
return NULL ;
2016-11-03 17:59:21 +01:00
}
2017-10-04 15:55:13 +02:00
if ( taxon = = NULL )
2016-11-03 17:59:21 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-10-04 15:55:13 +02:00
obidebug ( 1 , " \n Error trying to get the kingdom associated with a taxon: taxon pointer is NULL " ) ;
return NULL ;
}
rankindex = rank_label_to_index ( " kingdom " , taxonomy - > ranks ) ;
if ( rankindex < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the kingdom associated with a taxon: error getting rank index " ) ;
2016-11-03 17:59:21 +01:00
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
ecotx_t * obi_taxo_get_superkingdom ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
2017-01-06 15:52:21 +01:00
static int32_t rankindex = - 1 ;
2016-11-03 17:59:21 +01:00
2017-10-04 15:55:13 +02:00
if ( taxonomy = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the superkingdom associated with a taxon: taxonomy pointer is NULL " ) ;
return NULL ;
}
if ( taxon = = NULL )
2016-11-03 17:59:21 +01:00
{
2017-10-04 15:55:13 +02:00
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the superkingdom associated with a taxon: taxon pointer is NULL " ) ;
return NULL ;
2016-11-03 17:59:21 +01:00
}
2017-10-04 15:55:13 +02:00
rankindex = rank_label_to_index ( " superkingdom " , taxonomy - > ranks ) ;
if ( rankindex < 0 )
2016-11-03 17:59:21 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
2017-10-04 15:55:13 +02:00
obidebug ( 1 , " \n Error trying to get the superkingdom associated with a taxon: error getting rank index " ) ;
2016-11-03 17:59:21 +01:00
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
2017-10-04 15:55:13 +02:00
const char * obi_taxo_rank_index_to_label ( int32_t rank_idx , ecorankidx_t * ranks )
{
return ( ranks - > label ) [ rank_idx ] ;
}