2016-03-11 13:56:38 +01:00
/********************************************************************
* OBIDMS taxonomy functions *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* @ file obidms_taxonomy . c
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
* @ date March 2 nd 2016
* @ brief Functions for reading binary taxonomy files .
*/
# include <stdlib.h>
# include <stdio.h>
# include <string.h>
# include <stdbool.h>
# include <fcntl.h>
# include "obidms_taxonomy.h"
# include "obidms.h"
2016-10-10 17:04:29 +02:00
# include "obilittlebigman.h" // TODO the function from this checking the endianness does not seem to work properly
2016-03-11 13:56:38 +01:00
# include "obidebug.h"
# include "obierrno.h"
2016-04-08 15:38:57 +02:00
# include "utils.h"
2016-03-11 13:56:38 +01:00
# define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
2016-10-10 17:04:29 +02:00
// TODO : the malloc aren't checked but shouldn't exist for long because mapping instead
// error checking and file closing in general aren't done properly yet
// The endianness eventually shouldn't need checking too, as the machine will write the taxonomy with its endianness.
int32_t is_big_endian ( )
{
int32_t i = 1 ;
return ( int32_t ) ( ( char * ) & i ) [ 0 ] ;
}
int32_t swap_int32_t ( int32_t i )
{
return SWAPINT32 ( i ) ;
}
2016-03-11 13:56:38 +01:00
int compareRankLabel ( const void * label1 , const void * label2 )
{
return strcmp ( ( const char * ) label1 , * ( const char * * ) label2 ) ;
}
2016-10-14 17:03:10 +02:00
char * get_taxonomy_path ( OBIDMS_p dms , const char * tax_name )
{
char * all_tax_dir_path ;
char * tax_path ;
all_tax_dir_path = obi_dms_get_full_path ( dms , TAXONOMY_DIR_NAME ) ;
tax_path = ( char * ) malloc ( ( strlen ( all_tax_dir_path ) + strlen ( tax_name ) + 2 ) * sizeof ( char ) ) ;
if ( sprintf ( tax_path , " %s/%s " , all_tax_dir_path , tax_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building taxonomy path " ) ;
free ( all_tax_dir_path ) ;
return NULL ;
}
free ( all_tax_dir_path ) ;
return tax_path ;
}
2016-03-11 13:56:38 +01:00
int32_t rank_index ( const char * label , ecorankidx_t * ranks )
{
char * * rep ;
rep = bsearch ( label , ranks - > label , ranks - > count , sizeof ( char * ) , compareRankLabel ) ;
if ( rep )
2016-10-14 17:03:10 +02:00
return rep - ranks - > label ;
2016-03-11 13:56:38 +01:00
return - 1 ;
}
void * read_ecorecord ( FILE * f , int32_t * record_size )
{
static void * buffer = NULL ;
int32_t buffer_size = 0 ;
int32_t read ;
if ( ! record_size )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: record_size can not be NULL " ) ;
return NULL ;
}
read = fread ( record_size ,
sizeof ( int32_t ) ,
2016-10-10 17:04:29 +02:00
1 ,
2016-03-11 13:56:38 +01:00
f ) ;
if ( feof ( f ) )
return NULL ;
2016-10-10 17:04:29 +02:00
if ( read ! = 1 )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: error reading record size " ) ;
return NULL ;
}
2016-10-10 17:04:29 +02:00
// if (!(obi_is_little_endian())) // TODO
2016-10-14 17:03:10 +02:00
// if (is_big_endian())
// *record_size=swap_int32_t(*record_size);
2016-03-11 13:56:38 +01:00
if ( buffer_size < * record_size )
{
if ( buffer )
buffer = realloc ( buffer , * record_size ) ;
else
buffer = malloc ( * record_size ) ;
if ( buffer = = NULL )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: error allocating memory " ) ;
return NULL ;
}
}
read = fread ( buffer ,
* record_size ,
2016-10-10 17:04:29 +02:00
1 ,
2016-03-11 13:56:38 +01:00
f ) ;
2016-10-10 17:04:29 +02:00
if ( read ! = 1 )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading a taxonomy file: error reading a record %d, %d " , read , * record_size ) ;
return NULL ;
}
return buffer ;
} ;
ecotx_t * readnext_ecotaxon ( FILE * f , ecotx_t * taxon )
{
ecotxformat_t * raw ;
int32_t record_length ;
raw = read_ecorecord ( f , & record_length ) ;
if ( ! raw )
return NULL ;
2016-10-10 17:04:29 +02:00
// if (!(obi_is_little_endian())) // TODO
2016-10-14 17:03:10 +02:00
// if (is_big_endian())
// {
// raw->name_length = swap_int32_t(raw->name_length);
// raw->parent = swap_int32_t(raw->parent);
// raw->rank = swap_int32_t(raw->rank);
// raw->taxid = swap_int32_t(raw->taxid);
// }
2016-03-11 13:56:38 +01:00
taxon - > parent = ( ecotx_t * ) ( ( size_t ) raw - > parent ) ;
taxon - > taxid = raw - > taxid ;
taxon - > rank = raw - > rank ;
taxon - > farest = - 1 ;
taxon - > name = malloc ( ( raw - > name_length + 1 ) * sizeof ( char ) ) ;
strncpy ( taxon - > name , raw - > name , raw - > name_length ) ;
return taxon ;
}
FILE * open_ecorecorddb ( const char * file_name ,
int32_t * count ,
int32_t abort_on_open_error )
{
FILE * f ;
int32_t read ;
f = fopen ( file_name , " rb " ) ;
if ( ! f )
{
if ( abort_on_open_error )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Couldn't open a taxonomy file " ) ;
return NULL ;
}
else
{
* count = 0 ;
return NULL ;
}
}
read = fread ( count ,
sizeof ( int32_t ) ,
2016-10-10 17:04:29 +02:00
1 ,
2016-03-11 13:56:38 +01:00
f ) ;
2016-10-10 17:04:29 +02:00
if ( read ! = 1 )
2016-03-11 13:56:38 +01:00
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error reading taxonomy record size " ) ;
return NULL ;
}
2016-10-10 17:04:29 +02:00
// if (!(obi_is_little_endian())) // TODO
2016-10-14 17:03:10 +02:00
// if (is_big_endian())
// *count = swap_int32_t(*count);
2016-03-11 13:56:38 +01:00
return f ;
}
ecorankidx_t * read_rankidx ( const char * ranks_file_name )
{
int32_t count ;
FILE * ranks_file ;
ecorankidx_t * ranks_index ;
int32_t i ;
int32_t rank_length ;
char * buffer ;
ranks_file = open_ecorecorddb ( ranks_file_name , & count , 0 ) ;
if ( ranks_file = = NULL )
return NULL ;
ranks_index = ( ecorankidx_t * ) malloc ( sizeof ( ecorankidx_t ) + sizeof ( char * ) * ( count - 1 ) ) ;
ranks_index - > count = count ;
for ( i = 0 ; i < count ; i + + )
{
buffer = read_ecorecord ( ranks_file , & rank_length ) ;
ranks_index - > label [ i ] = ( char * ) malloc ( rank_length + 1 ) ;
strncpy ( ranks_index - > label [ i ] , buffer , rank_length ) ;
2016-10-14 17:03:10 +02:00
( ranks_index - > label [ i ] ) [ rank_length ] = 0 ;
2016-03-11 13:56:38 +01:00
}
return ranks_index ;
}
ecotxidx_t * read_taxonomyidx ( const char * taxa_file_name , const char * local_taxa_file_name )
{
int32_t count_taxa ;
int32_t count_local_taxa ;
FILE * f_taxa ;
FILE * f_local_taxa ;
ecotxidx_t * taxa_index ;
struct ecotxnode * t ;
int32_t i ;
int32_t j ;
f_taxa = open_ecorecorddb ( taxa_file_name , & count_taxa , 0 ) ;
if ( f_taxa = = NULL )
{
obidebug ( 1 , " \n Error reading taxonomy taxa file " ) ;
return NULL ;
}
f_local_taxa = open_ecorecorddb ( local_taxa_file_name , & count_local_taxa , 0 ) ;
taxa_index = ( ecotxidx_t * ) malloc ( sizeof ( ecotxidx_t ) + sizeof ( ecotx_t ) * ( count_taxa + count_local_taxa - 1 ) ) ;
taxa_index - > count = count_taxa + count_local_taxa ;
taxa_index - > buffer_size = taxa_index - > count ;
taxa_index - > max_taxid = 0 ;
printf ( " Reading %d taxa... \n " , count_taxa ) ;
for ( i = 0 ; i < count_taxa ; i + + )
{
readnext_ecotaxon ( f_taxa , & ( taxa_index - > taxon [ i ] ) ) ;
taxa_index - > taxon [ i ] . parent = taxa_index - > taxon + ( size_t ) taxa_index - > taxon [ i ] . parent ;
taxa_index - > taxon [ i ] . parent - > farest = 0 ;
if ( taxa_index - > taxon [ i ] . taxid > taxa_index - > max_taxid )
taxa_index - > max_taxid = taxa_index - > taxon [ i ] . taxid ;
}
if ( count_local_taxa > 0 )
printf ( " Reading %d local taxa... \n " , count_local_taxa ) ;
else
printf ( " No local taxa \n " ) ;
count_taxa = taxa_index - > count ;
for ( ; i < count_taxa ; i + + ) {
readnext_ecotaxon ( f_local_taxa , & ( taxa_index - > taxon [ i ] ) ) ;
2016-10-14 17:03:10 +02:00
taxa_index - > taxon [ i ] . idx = i ;
2016-03-11 13:56:38 +01:00
taxa_index - > taxon [ i ] . parent = taxa_index - > taxon + ( size_t ) taxa_index - > taxon [ i ] . parent ;
taxa_index - > taxon [ i ] . parent - > farest = 0 ;
if ( taxa_index - > taxon [ i ] . taxid > taxa_index - > max_taxid )
taxa_index - > max_taxid = taxa_index - > taxon [ i ] . taxid ;
}
printf ( " Computing longest branches... \n " ) ;
for ( i = 0 ; i < count_taxa ; i + + )
{
t = taxa_index - > taxon + i ;
if ( t - > farest = = - 1 )
{
t - > farest = 0 ;
while ( t - > parent ! = t )
{
j = t - > farest + 1 ;
if ( j > t - > parent - > farest )
{
t - > parent - > farest = j ;
t = t - > parent ;
}
else
t = taxa_index - > taxon ;
}
}
}
return taxa_index ;
}
econame_t * readnext_econame ( FILE * f , econame_t * name , OBIDMS_taxonomy_p taxonomy )
{
econameformat_t * raw ;
int32_t record_length ;
raw = read_ecorecord ( f , & record_length ) ;
if ( ! raw )
return NULL ;
2016-10-10 17:04:29 +02:00
// if (!(obi_is_little_endian())) // TODO
2016-10-14 17:03:10 +02:00
// if (is_big_endian())
// {
// raw->is_scientific_name = swap_int32_t(raw->is_scientific_name);
// raw->name_length = swap_int32_t(raw->name_length);
// raw->class_length = swap_int32_t(raw->class_length);
// raw->taxid = swap_int32_t(raw->taxid);
// }
2016-03-11 13:56:38 +01:00
name - > is_scientific_name = raw - > is_scientific_name ;
name - > name = malloc ( ( raw - > name_length + 1 ) * sizeof ( char ) ) ;
strncpy ( name - > name , raw - > names , raw - > name_length ) ;
name - > name [ raw - > name_length ] = 0 ;
name - > class_name = malloc ( ( raw - > class_length + 1 ) * sizeof ( char ) ) ;
strncpy ( name - > class_name , ( raw - > names + raw - > name_length ) , raw - > class_length ) ;
name - > class_name [ raw - > class_length ] = 0 ;
name - > taxon = taxonomy - > taxa - > taxon + raw - > taxid ;
return name ;
}
econameidx_t * read_nameidx ( const char * file_name , OBIDMS_taxonomy_p taxonomy )
{
int32_t count ;
FILE * f ;
econameidx_t * index_names ;
int32_t i ;
f = open_ecorecorddb ( file_name , & count , 0 ) ;
if ( f = = NULL )
return NULL ;
index_names = ( econameidx_t * ) malloc ( sizeof ( econameidx_t ) + sizeof ( econame_t ) * ( count - 1 ) ) ;
index_names - > count = count ;
for ( i = 0 ; i < count ; i + + )
readnext_econame ( f , ( index_names - > names ) + i , taxonomy ) ;
return index_names ;
}
static int bcomptaxon ( const void * ptaxid , const void * ptaxon )
{
ecotx_t * current_taxon = ( ecotx_t * ) ptaxon ;
int32_t taxid = ( int32_t ) ( ( size_t ) ptaxid ) ;
return taxid - current_taxon - > taxid ;
}
/////// PUBLIC /////////
OBIDMS_taxonomy_p obi_read_taxonomy ( OBIDMS_p dms , const char * taxonomy_name , bool read_alternative_names )
{
OBIDMS_taxonomy_p tax ;
char * taxonomy_path ;
char * ranks_file_name ;
char * taxa_file_name ;
char * local_taxa_file_name ;
char * alter_names_file_name ;
int buffer_size ;
tax = ( OBIDMS_taxonomy_p ) malloc ( sizeof ( OBIDMS_taxonomy_t ) ) ;
tax - > ranks = NULL ;
tax - > taxa = NULL ;
tax - > names = NULL ;
buffer_size = 2048 ; // TODO
2016-10-14 17:03:10 +02:00
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
2016-03-11 13:56:38 +01:00
// Read ranks
ranks_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( ranks_file_name = = NULL )
{
free ( taxonomy_path ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
2016-10-14 17:03:10 +02:00
if ( snprintf ( ranks_file_name , buffer_size , " %s/%s.rdx " , taxonomy_path , taxonomy_name ) < 0 )
2016-03-11 13:56:38 +01:00
{
free ( taxonomy_path ) ;
free ( ranks_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
tax - > ranks = read_rankidx ( ranks_file_name ) ;
if ( tax - > ranks = = NULL )
{
free ( ranks_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
free ( ranks_file_name ) ;
// Read taxa
taxa_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( taxa_file_name = = NULL )
{
free ( taxonomy_path ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
2016-10-14 17:03:10 +02:00
if ( snprintf ( taxa_file_name , buffer_size , " %s/%s.tdx " , taxonomy_path , taxonomy_name ) < 0 )
2016-03-11 13:56:38 +01:00
{
free ( taxonomy_path ) ;
free ( taxa_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
local_taxa_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( local_taxa_file_name = = NULL )
{
free ( taxonomy_path ) ;
free ( taxa_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
2016-10-14 17:03:10 +02:00
if ( snprintf ( local_taxa_file_name , buffer_size , " %s/%s.ldx " , taxonomy_path , taxonomy_name ) < 0 )
2016-03-11 13:56:38 +01:00
{
free ( taxonomy_path ) ;
free ( taxa_file_name ) ;
free ( local_taxa_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
tax - > taxa = read_taxonomyidx ( taxa_file_name , local_taxa_file_name ) ;
if ( tax - > taxa = = NULL )
{
free ( taxonomy_path ) ;
free ( taxa_file_name ) ;
free ( local_taxa_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
free ( taxa_file_name ) ;
free ( local_taxa_file_name ) ;
// Read alternative names
if ( read_alternative_names )
{
alter_names_file_name = ( char * ) malloc ( buffer_size * sizeof ( char ) ) ;
if ( alter_names_file_name = = NULL )
{
free ( taxonomy_path ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
2016-10-14 17:03:10 +02:00
if ( snprintf ( alter_names_file_name , buffer_size , " %s/%s.ndx " , taxonomy_path , taxonomy_name ) < 0 )
2016-03-11 13:56:38 +01:00
{
free ( taxonomy_path ) ;
free ( alter_names_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
tax - > names = read_nameidx ( alter_names_file_name , tax ) ;
if ( tax - > names = = NULL )
{
free ( alter_names_file_name ) ;
obi_close_taxonomy ( tax ) ;
return NULL ;
}
free ( alter_names_file_name ) ;
}
free ( taxonomy_path ) ;
return tax ;
}
int obi_close_taxonomy ( OBIDMS_taxonomy_p taxonomy )
{
if ( taxonomy )
{
if ( taxonomy - > ranks )
2016-10-14 17:03:10 +02:00
free ( taxonomy - > ranks ) ; // TODO those don't free everything
2016-03-11 13:56:38 +01:00
if ( taxonomy - > names )
free ( taxonomy - > names ) ;
if ( taxonomy - > taxa )
free ( taxonomy - > taxa ) ;
free ( taxonomy ) ;
return 0 ;
}
2016-10-14 17:03:10 +02:00
// close files
2016-03-11 13:56:38 +01:00
return 1 ;
}
//////////////////////////////////////////////////////////////////////////
ecotx_t * obi_taxo_get_parent_at_rank ( ecotx_t * taxon , int32_t rankidx )
{
ecotx_t * current_taxon ;
ecotx_t * next_taxon ;
current_taxon = taxon ;
next_taxon = current_taxon - > parent ;
while ( ( current_taxon ! = next_taxon ) & & // root node
( current_taxon - > rank ! = rankidx ) )
{
current_taxon = next_taxon ;
next_taxon = current_taxon - > parent ;
}
if ( current_taxon - > rank = = rankidx )
return current_taxon ;
else
return NULL ;
}
ecotx_t * obi_taxo_get_taxon_with_taxid ( OBIDMS_taxonomy_p taxonomy , int32_t taxid )
{
ecotx_t * current_taxon ;
int32_t count ;
count = taxonomy - > taxa - > count ;
current_taxon = ( ecotx_t * ) bsearch ( ( const void * ) ( ( size_t ) taxid ) ,
( const void * ) taxonomy - > taxa - > taxon ,
count ,
sizeof ( ecotx_t ) ,
bcomptaxon ) ;
return current_taxon ;
}
bool obi_taxo_is_taxon_under_taxid ( ecotx_t * taxon , int32_t other_taxid )
{
ecotx_t * next_parent ;
next_parent = taxon - > parent ;
while ( ( other_taxid ! = next_parent - > taxid ) & & ( strcmp ( next_parent - > name , " root " ) ) )
next_parent = next_parent - > parent ;
if ( other_taxid = = next_parent - > taxid )
return 1 ;
else
return 0 ;
}
ecotx_t * obi_taxo_get_species ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
static OBIDMS_taxonomy_p tax = NULL ;
static int32_t rankindex = - 1 ;
if ( taxonomy & & ( tax ! = taxonomy ) )
{
rankindex = rank_index ( " species " , taxonomy - > ranks ) ;
tax = taxonomy ;
}
if ( ! tax | | ( rankindex < 0 ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the species associated with a taxon: No taxonomy defined " ) ;
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
ecotx_t * obi_taxo_get_genus ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
static OBIDMS_taxonomy_p tax = NULL ;
static int32_t rankindex = - 1 ;
if ( taxonomy & & ( tax ! = taxonomy ) )
{
rankindex = rank_index ( " genus " , taxonomy - > ranks ) ;
tax = taxonomy ;
}
if ( ! tax | | ( rankindex < 0 ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the genus associated with a taxon: No taxonomy defined " ) ;
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
ecotx_t * obi_taxo_get_family ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
static OBIDMS_taxonomy_p tax = NULL ;
static int32_t rankindex = - 1 ;
if ( taxonomy & & ( tax ! = taxonomy ) )
{
rankindex = rank_index ( " family " , taxonomy - > ranks ) ;
tax = taxonomy ;
}
if ( ! tax | | ( rankindex < 0 ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the family associated with a taxon: No taxonomy defined " ) ;
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
ecotx_t * obi_taxo_get_kingdom ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
static OBIDMS_taxonomy_p tax = NULL ;
static int32_t rankindex = - 1 ;
if ( taxonomy & & ( tax ! = taxonomy ) )
{
rankindex = rank_index ( " kingdom " , taxonomy - > ranks ) ;
tax = taxonomy ;
}
if ( ! tax | | ( rankindex < 0 ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the kingdom associated with a taxon: No taxonomy defined " ) ;
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
ecotx_t * obi_taxo_get_superkingdom ( ecotx_t * taxon , OBIDMS_taxonomy_p taxonomy )
{
static OBIDMS_taxonomy_p tax = NULL ;
static int32_t rankindex = - 1 ;
if ( taxonomy & & ( tax ! = taxonomy ) )
{
rankindex = rank_index ( " superkingdom " , taxonomy - > ranks ) ;
tax = taxonomy ;
}
if ( ! tax | | ( rankindex < 0 ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error trying to get the superkingdom associated with a taxon: No taxonomy defined " ) ;
return NULL ;
}
return obi_taxo_get_parent_at_rank ( taxon , rankindex ) ;
}
2016-10-14 17:03:10 +02:00
// Functions to write taxonomy structure to binary files
int write_rankidx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct?
{
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
int32_t length ;
// Compute file size
file_size = sizeof ( int32_t ) ;
for ( i = 0 ; i < ( tax - > ranks ) - > count ; i + + )
{
file_size = file_size + sizeof ( int32_t ) ; // To store label size
file_size = file_size + strlen ( ( ( tax - > ranks ) - > label ) [ i ] ) ; // To store label
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 5 ) * sizeof ( char ) ) ;
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
}
// Build the file path
if ( sprintf ( file_name , " %s/%s.rdx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
}
free ( taxonomy_path ) ;
// Create file
file_descriptor = open ( file_name , O_RDWR | O_CREAT | O_EXCL , 0777 ) ;
if ( file_descriptor < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error creating a binary taxonomy file " ) ;
free ( file_name ) ;
return - 1 ;
}
free ( file_name ) ;
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write rank count
if ( write ( file_descriptor , & ( ( tax - > ranks ) - > count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write ranks
for ( i = 0 ; i < ( tax - > ranks ) - > count ; i + + )
{
length = strlen ( ( ( tax - > ranks ) - > label ) [ i ] ) ;
// Write rank size
if ( write ( file_descriptor , & length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write rank label
if ( write ( file_descriptor , ( ( tax - > ranks ) - > label ) [ i ] , length ) < ( ( ssize_t ) length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
}
// Close file
if ( close ( file_descriptor ) < 0 )
{
obi_set_errno ( OBIDMS_UNKNOWN_ERROR ) ;
obidebug ( 1 , " \n Error closing a DMS information file " ) ;
return - 1 ;
}
return 0 ;
}
int write_taxonomyidx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
{
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
int32_t name_length ;
int32_t record_size ;
// Compute file size
file_size = sizeof ( int32_t ) ; // To store record count
for ( i = 0 ; i < ( tax - > taxa ) - > count ; i + + )
{
file_size = file_size + sizeof ( int32_t ) * 5 ; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen ( tax - > taxa - > taxon [ i ] . name ) ; // To store name
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 5 ) * sizeof ( char ) ) ;
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
}
// Build the file path
if ( sprintf ( file_name , " %s/%s.tdx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
}
free ( taxonomy_path ) ;
// Create file
file_descriptor = open ( file_name , O_RDWR | O_CREAT | O_EXCL , 0777 ) ;
if ( file_descriptor < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error creating a binary taxonomy file " ) ;
free ( file_name ) ;
return - 1 ;
}
free ( file_name ) ;
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write record count
if ( write ( file_descriptor , & ( tax - > taxa - > count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write records
for ( i = 0 ; i < tax - > taxa - > count ; i + + )
{
name_length = strlen ( tax - > taxa - > taxon [ i ] . name ) ;
record_size = 4 * sizeof ( int32_t ) + name_length ;
// Write record size
if ( write ( file_descriptor , & record_size , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write taxid
if ( write ( file_descriptor , & ( tax - > taxa - > taxon [ i ] . taxid ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write rank index
if ( write ( file_descriptor , & ( tax - > taxa - > taxon [ i ] . rank ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write parent index
if ( write ( file_descriptor , & ( ( tax - > taxa - > taxon [ i ] . parent ) - > idx ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name length
if ( write ( file_descriptor , & name_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name
if ( write ( file_descriptor , tax - > taxa - > taxon [ i ] . name , name_length ) < ( ( ssize_t ) name_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
}
// Close file
if ( close ( file_descriptor ) < 0 )
{
obi_set_errno ( OBIDMS_UNKNOWN_ERROR ) ;
obidebug ( 1 , " \n Error closing a DMS information file " ) ;
return - 1 ;
}
return 0 ;
}
int write_nameidx ( OBIDMS_p dms , OBIDMS_taxonomy_p tax , const char * taxonomy_name ) // TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
{
int i ;
char * file_name ;
int file_descriptor ;
off_t file_size ;
char * taxonomy_path ;
int32_t name_length ;
int32_t class_length ;
int32_t record_size ;
// Compute file size
file_size = sizeof ( int32_t ) ; // To store record count
for ( i = 0 ; i < ( tax - > names ) - > count ; i + + )
{
file_size = file_size + sizeof ( int32_t ) * 5 ; // To store record size, taxid, rank index, parent index, and name length
file_size = file_size + strlen ( tax - > names - > names [ i ] . name ) ; // To store name
file_size = file_size + strlen ( tax - > names - > names [ i ] . class_name ) ; // To store name
}
// Build the taxonomy directory path
taxonomy_path = get_taxonomy_path ( dms , taxonomy_name ) ;
file_name = ( char * ) malloc ( ( strlen ( taxonomy_path ) + strlen ( taxonomy_name ) + 5 ) * sizeof ( char ) ) ;
if ( file_name = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating the memory for a binary taxonomy file name " ) ;
return - 1 ;
}
// Build the file path
if ( sprintf ( file_name , " %s/%s.ndx " , taxonomy_path , taxonomy_name ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error building a binary taxonomy file name " ) ;
return - 1 ;
}
free ( taxonomy_path ) ;
// Create file
file_descriptor = open ( file_name , O_RDWR | O_CREAT | O_EXCL , 0777 ) ;
if ( file_descriptor < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error creating a binary taxonomy file " ) ;
free ( file_name ) ;
return - 1 ;
}
free ( file_name ) ;
// Truncate the file to the right size
if ( ftruncate ( file_descriptor , file_size ) < 0 )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error truncating a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write record count
if ( write ( file_descriptor , & ( tax - > names - > count ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write records
for ( i = 0 ; i < tax - > names - > count ; i + + )
{
name_length = strlen ( tax - > names - > names [ i ] . name ) ;
class_length = strlen ( tax - > names - > names [ i ] . class_name ) ;
record_size = 4 * sizeof ( int32_t ) + name_length + class_length ;
// Write record size
if ( write ( file_descriptor , & record_size , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write if the name is a scientific name
if ( write ( file_descriptor , & ( tax - > names - > names [ i ] . is_scientific_name ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name length
if ( write ( file_descriptor , & name_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write class length
if ( write ( file_descriptor , & class_length , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write taxid index
if ( write ( file_descriptor , & ( tax - > names - > names [ i ] . taxon - > idx ) , sizeof ( int32_t ) ) < ( ( ssize_t ) sizeof ( int32_t ) ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write name
if ( write ( file_descriptor , tax - > names - > names [ i ] . name , name_length ) < ( ( ssize_t ) name_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
// Write class
if ( write ( file_descriptor , tax - > names - > names [ i ] . class_name , class_length ) < ( ( ssize_t ) class_length ) )
{
obi_set_errno ( OBI_TAXONOMY_ERROR ) ;
obidebug ( 1 , " \n Error writing in a binary taxonomy file " ) ;
close ( file_descriptor ) ;
return - 1 ;
}
}
// Close file
if ( close ( file_descriptor ) < 0 )
{
obi_set_errno ( OBIDMS_UNKNOWN_ERROR ) ;
obidebug ( 1 , " \n Error closing a DMS information file " ) ;
return - 1 ;
}
return 0 ;
}