2018-12-09 19:19:32 +01:00
/****************************************************************************
* Taxonomic assignment of sequences *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* @ file obi_ecotag . c
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
* @ date November 15 th 2018
* @ brief Functions for the taxonomic assignment of sequences .
*/
//#define OMP_SUPPORT // TODO
# ifdef OMP_SUPPORT
# include <omp.h>
# endif
# include <stdlib.h>
# include <stdio.h>
# include <stdbool.h>
# include <search.h>
# include <sys/time.h>
# include "obi_ecotag.h"
# include "obidms_taxonomy.h"
# include "obidms.h"
# include "obidebug.h"
# include "obierrno.h"
2019-09-21 16:47:22 +02:00
# include "obisig.h"
2018-12-09 19:19:32 +01:00
# include "obitypes.h"
# include "obiview.h"
# include "obidmscolumn.h"
# include "sse_banded_LCS_alignment.h"
# include "upperband.h"
# include "obiblob.h"
# include "build_reference_db.h"
2019-08-31 18:30:06 +02:00
# include "libjson/json_utils.h"
2018-12-09 19:19:32 +01:00
# define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**************************************************************************
*
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* Internal function creating the output columns for the ecotag algorithm .
*
* @ param o_view A pointer on the output view .
*
* @ retval 0 if the operation was successfully completed .
* @ retval - 1 if an error occurred .
*
* @ since December 2018
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int create_output_columns ( Obiview_p o_view ) ;
/**
* @ brief Internal function printing the result of one assignment to the output view .
*
* @ param output_view A pointer on the writable view where the result should be written .
* @ param line The line in the output view where the result should be written .
* @ param assigned_taxid_column A pointer on the column where the assigned taxid should be written .
* @ param taxid The assigned taxid .
* @ param assigned_name_column A pointer on the column where the assigned scientific name should be written .
* @ param name The assigned scientific name .
* @ param assigned_status_column A pointer on the column where the assigned status should be written .
* @ param assigned The assigned status ( whether the sequence was assigned to a taxon or not ) .
* @ param best_match_column A pointer on the column where the list of ids of the best matches should be written .
* @ param best_match_ids The list of ids of the best matches as an array of the concatenated ids separated by ' \0 ' .
* @ param best_match_ids_length The total length of the array of ids of best matches .
* @ param score_column A pointer on the column where the score should be written .
* @ param score The similarity score of the sequence with its best match ( es ) .
*
* @ retval 0 if the operation was successfully completed .
* @ retval - 1 if an error occurred .
*
* @ since December 2016
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
int print_assignment_result ( Obiview_p output_view , index_t line ,
OBIDMS_column_p assigned_taxid_column , int32_t taxid ,
OBIDMS_column_p assigned_name_column , const char * name ,
OBIDMS_column_p assigned_status_column , bool assigned ,
OBIDMS_column_p best_match_column , const char * best_match_ids , int best_match_ids_length ,
OBIDMS_column_p score_column , double score ) ;
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int create_output_columns ( Obiview_p o_view )
{
// Score column
2020-02-12 14:23:17 +01:00
if ( obi_view_add_column ( o_view , ECOTAG_SCORE_COLUMN_NAME , - 1 , NULL , OBI_FLOAT , 0 , 1 , NULL , false , false , false , NULL , NULL , - 1 , " {} " , true ) < 0 )
2018-12-09 19:19:32 +01:00
{
obidebug ( 1 , " \n Error creating the column for the score in ecotag " ) ;
return - 1 ;
}
// Assigned taxid column
2020-02-12 14:23:17 +01:00
if ( obi_view_add_column ( o_view , ECOTAG_TAXID_COLUMN_NAME , - 1 , NULL , OBI_INT , 0 , 1 , NULL , false , false , false , NULL , NULL , - 1 , " {} " , true ) < 0 )
2018-12-09 19:19:32 +01:00
{
obidebug ( 1 , " \n Error creating the column for the assigned taxid in ecotag " ) ;
return - 1 ;
}
// Assigned scientific name column
2020-02-12 14:23:17 +01:00
if ( obi_view_add_column ( o_view , ECOTAG_NAME_COLUMN_NAME , - 1 , NULL , OBI_STR , 0 , 1 , NULL , false , false , false , NULL , NULL , - 1 , " {} " , true ) < 0 )
2018-12-09 19:19:32 +01:00
{
obidebug ( 1 , " \n Error creating the column for the assigned scientific name in ecotag " ) ;
return - 1 ;
}
// Assignement status column
2020-02-12 14:23:17 +01:00
if ( obi_view_add_column ( o_view , ECOTAG_STATUS_COLUMN_NAME , - 1 , NULL , OBI_BOOL , 0 , 1 , NULL , false , false , false , NULL , NULL , - 1 , " {} " , true ) < 0 )
2018-12-09 19:19:32 +01:00
{
obidebug ( 1 , " \n Error creating the column for the assignment status in ecotag " ) ;
return - 1 ;
}
// Column for array of best match ids
2020-02-12 14:23:17 +01:00
if ( obi_view_add_column ( o_view , ECOTAG_BEST_MATCH_IDS_COLUMN_NAME , - 1 , NULL , OBI_STR , 0 , 1 , NULL , false , true , false , NULL , NULL , - 1 , " {} " , true ) < 0 )
2018-12-09 19:19:32 +01:00
{
obidebug ( 1 , " \n Error creating the column for the array of ids of the best match in ecotag " ) ;
return - 1 ;
}
return 0 ;
}
int print_assignment_result ( Obiview_p output_view , index_t line ,
OBIDMS_column_p assigned_taxid_column , int32_t taxid ,
OBIDMS_column_p assigned_name_column , const char * name ,
OBIDMS_column_p assigned_status_column , bool assigned ,
OBIDMS_column_p best_match_column , const char * best_match_ids , int best_match_ids_length ,
OBIDMS_column_p score_column , double score )
{
// Write the assigned taxid
if ( obi_set_int_with_elt_idx_and_col_p_in_view ( output_view , assigned_taxid_column , line , 0 , taxid ) < 0 )
{
obidebug ( 1 , " \n Error writing a taxid in a column when writing ecotag results " ) ;
return - 1 ;
}
// Write the assigned scientific name
if ( obi_set_str_with_elt_idx_and_col_p_in_view ( output_view , assigned_name_column , line , 0 , name ) < 0 )
{
obidebug ( 1 , " \n Error writing a scientific name in a column when writing ecotag results " ) ;
return - 1 ;
}
// Write the assigned status
if ( obi_set_bool_with_elt_idx_and_col_p_in_view ( output_view , assigned_status_column , line , 0 , assigned ) < 0 )
{
obidebug ( 1 , " \n Error writing a assignment status in a column when writing ecotag results " ) ;
return - 1 ;
}
// Write the best match ids
if ( obi_set_array_with_col_p_in_view ( output_view , best_match_column , line , best_match_ids , ( uint8_t ) ( sizeof ( char ) * 8 ) , best_match_ids_length ) < 0 )
{
obidebug ( 1 , " \n Error writing a assignment status in a column when writing ecotag results " ) ;
return - 1 ;
}
// Write the similarity score with the best match
if ( obi_set_float_with_elt_idx_and_col_p_in_view ( output_view , score_column , line , 0 , score ) < 0 )
{
obidebug ( 1 , " \n Error writing a score in a column when writing ecotag results " ) ;
return - 1 ;
}
return 0 ;
}
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
int obi_ecotag ( const char * dms_name ,
const char * query_view_name ,
const char * ref_dms_name ,
const char * ref_view_name ,
const char * taxo_dms_name ,
const char * taxonomy_name ,
const char * output_view_name ,
const char * output_view_comments ,
2019-08-31 18:30:06 +02:00
double ecotag_threshold ) // TODO different threshold for the similarity sphere around ref seqs
2018-12-09 19:19:32 +01:00
{
// For each sequence
// Align with DB
// Keep the indices of all max scores
// For each kept index, get the LCA at threshold, then the LCA of those LCAs
// Write result (max score, threshold, LCA assigned, list of the ids of the best matches)
index_t i , j , k ;
ecotx_t * lca ;
ecotx_t * lca_in_array ;
ecotx_t * best_match ;
index_t query_seq_idx , ref_seq_idx ;
double score , best_score ;
double threshold ;
int lcs_length ;
int ali_length ;
Kmer_table_p ktable ;
Obi_blob_p blob1 ;
Obi_blob_p blob2 ;
int lcs_min ;
index_t query_count ;
index_t ref_count ;
bool same_indexer ;
const double * score_array ;
const int * lca_array ;
index_t * best_match_array ;
char * best_match_ids ;
2019-07-06 16:31:19 +02:00
char * best_match_ids_to_store ;
2018-12-09 19:19:32 +01:00
int32_t best_match_ids_length ;
int best_match_count ;
int buffer_size ;
int best_match_ids_buffer_size ;
index_t best_match_idx ;
int32_t lca_array_length ;
int32_t lca_taxid ;
int32_t taxid_best_match ;
bool assigned ;
const char * lca_name ;
const char * id ;
int id_len ;
OBIDMS_p dms = NULL ;
OBIDMS_p ref_dms = NULL ;
OBIDMS_p taxo_dms = NULL ;
OBIDMS_taxonomy_p taxonomy = NULL ;
Obiview_p query_view = NULL ;
Obiview_p ref_view = NULL ;
Obiview_p output_view = NULL ;
OBIDMS_column_p query_seq_column = NULL ;
OBIDMS_column_p ref_seq_column = NULL ;
OBIDMS_column_p ref_id_column = NULL ;
OBIDMS_column_p score_column = NULL ;
OBIDMS_column_p assigned_taxid_column = NULL ;
OBIDMS_column_p assigned_name_column = NULL ;
OBIDMS_column_p assigned_status_column = NULL ;
OBIDMS_column_p best_match_column = NULL ;
OBIDMS_column_p lca_taxid_a_column = NULL ;
OBIDMS_column_p score_a_column = NULL ;
OBIDMS_column_p ref_taxid_column = NULL ;
2019-08-31 18:30:06 +02:00
char * db_threshold_str = NULL ;
double db_threshold ;
2018-12-09 19:19:32 +01:00
buffer_size = 1024 ;
best_match_ids_buffer_size = 1024 ;
2019-09-21 16:47:22 +02:00
signal ( SIGINT , sig_handler ) ;
2018-12-09 19:19:32 +01:00
// Open main DMS containing the query sequences and where the output will be written
2019-09-20 20:37:19 +02:00
dms = obi_open_dms ( dms_name , false ) ;
2018-12-09 19:19:32 +01:00
if ( dms = = NULL )
{
obidebug ( 1 , " \n Error opening the DMS containing the query sequences for ecotag " ) ;
return - 1 ;
}
// Open the DMS containing the reference database (can be the same or not)
2019-09-20 20:37:19 +02:00
ref_dms = obi_open_dms ( ref_dms_name , false ) ;
2018-12-09 19:19:32 +01:00
if ( ref_dms = = NULL )
{
obidebug ( 1 , " \n Error opening the DMS containing the reference database for ecotag " ) ;
return - 1 ;
}
// Open the DMS containing the taxonomy (can be the same or not)
2019-09-20 20:37:19 +02:00
taxo_dms = obi_open_dms ( taxo_dms_name , false ) ;
2018-12-09 19:19:32 +01:00
if ( taxo_dms = = NULL )
{
obidebug ( 1 , " \n Error opening the DMS containing the taxonomy for ecotag " ) ;
return - 1 ;
}
// Open the taxonomy
taxonomy = obi_read_taxonomy ( taxo_dms , taxonomy_name , 0 ) ;
if ( taxonomy = = NULL )
{
obidebug ( 1 , " \n Error opening the taxonomy for the assignment of sequences " ) ;
return - 1 ;
}
// Open the query view
query_view = obi_open_view ( dms , query_view_name ) ;
if ( query_view = = NULL )
{
obidebug ( 1 , " \n Error opening the view of query sequences to assign " ) ;
return - 1 ;
}
// Open the reference view.
ref_view = obi_open_view ( ref_dms , ref_view_name ) ;
if ( ref_view = = NULL )
{
obidebug ( 1 , " \n Error opening the view of reference sequences to assign " ) ;
return - 1 ;
}
// Open the column of query sequences to assign
if ( strcmp ( ( query_view - > infos ) - > view_type , VIEW_TYPE_NUC_SEQS ) = = 0 )
query_seq_column = obi_view_get_column ( query_view , NUC_SEQUENCE_COLUMN ) ;
else
{
obi_set_errno ( OBI_ECOTAG_ERROR ) ;
obidebug ( 1 , " \n Error: query view must have the type NUC_SEQS view " ) ;
return - 1 ;
}
if ( query_seq_column = = NULL )
{
obidebug ( 1 , " \n Error getting the query column to assign " ) ;
return - 1 ;
}
2019-08-31 18:30:06 +02:00
// Open the column of reference sequences to compare the query sequences to
2018-12-09 19:19:32 +01:00
if ( strcmp ( ( ref_view - > infos ) - > view_type , VIEW_TYPE_NUC_SEQS ) = = 0 )
ref_seq_column = obi_view_get_column ( ref_view , NUC_SEQUENCE_COLUMN ) ;
else
{
obi_set_errno ( OBI_ECOTAG_ERROR ) ;
obidebug ( 1 , " \n Error: reference view must have the type NUC_SEQS view " ) ;
return - 1 ;
}
if ( ref_seq_column = = NULL )
{
obidebug ( 1 , " \n Error getting the reference column to assign " ) ;
return - 1 ;
}
2019-08-31 18:30:06 +02:00
// Check if the demanded threshold is lower than the threshold used to build the reference database
db_threshold_str = obi_read_comment ( ( ref_view - > infos ) - > comments , DB_THRESHOLD_KEY_IN_COMMENTS ) ;
if ( db_threshold_str = = NULL )
{
obidebug ( 1 , " \n Error reading the threshold used to build the reference database. " ) ;
return - 1 ;
}
if ( sscanf ( db_threshold_str + 1 , " %lf " , & db_threshold ) < = 0 )
{
obidebug ( 1 , " \n Error reading the threshold used to build the reference database. " ) ;
return - 1 ;
}
free ( db_threshold_str ) ;
if ( ecotag_threshold < db_threshold )
2019-09-15 19:27:47 +02:00
{
fprintf ( stderr , " \n Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f). \n \n " ,
2019-08-31 18:30:06 +02:00
ecotag_threshold , db_threshold ) ;
2019-09-15 19:27:47 +02:00
return - 1 ;
}
2019-08-31 18:30:06 +02:00
2018-12-09 19:19:32 +01:00
// Open the ID column of reference sequences
ref_id_column = obi_view_get_column ( ref_view , ID_COLUMN ) ;
if ( ref_id_column = = NULL )
{
obidebug ( 1 , " \n Error getting the ID column of the reference view " ) ;
return - 1 ;
}
// Create the output view
output_view = obi_clone_view ( dms , query_view , output_view_name , NULL , output_view_comments ) ;
if ( output_view = = NULL )
{
obidebug ( 1 , " \n Error creating the output view when assigning " ) ;
return - 1 ;
}
// Create the output columns
if ( create_output_columns ( output_view ) < 0 )
return - 1 ;
assigned_taxid_column = obi_view_get_column ( output_view , ECOTAG_TAXID_COLUMN_NAME ) ;
assigned_name_column = obi_view_get_column ( output_view , ECOTAG_NAME_COLUMN_NAME ) ;
assigned_status_column = obi_view_get_column ( output_view , ECOTAG_STATUS_COLUMN_NAME ) ;
best_match_column = obi_view_get_column ( output_view , ECOTAG_BEST_MATCH_IDS_COLUMN_NAME ) ;
score_column = obi_view_get_column ( output_view , ECOTAG_SCORE_COLUMN_NAME ) ;
// Open the used reference columns
lca_taxid_a_column = obi_view_get_column ( ref_view , LCA_TAXID_ARRAY_COLUMN_NAME ) ;
if ( lca_taxid_a_column = = NULL )
{
obidebug ( 1 , " \n Error opening the reference LCA taxid array column when doing the taxonomic assignment of sequences " ) ;
return - 1 ;
}
score_a_column = obi_view_get_column ( ref_view , LCA_SCORE_ARRAY_COLUMN_NAME ) ;
if ( score_a_column = = NULL )
{
obidebug ( 1 , " \n Error opening the score array column when doing the taxonomic assignment of sequences " ) ;
return - 1 ;
}
ref_taxid_column = obi_view_get_column ( ref_view , TAXID_COLUMN ) ;
if ( ref_taxid_column = = NULL )
{
obidebug ( 1 , " \n Error opening the taxid column when doing the taxonomic assignment of sequences " ) ;
return - 1 ;
}
// Check if the sequence columns share the same indexer (allows for quick checking of sequence equality)
2019-07-23 19:02:17 +02:00
if ( ( strcmp ( ( query_seq_column - > header ) - > indexer_name , ( ref_seq_column - > header ) - > indexer_name ) = = 0 ) & & ( dms = = ref_dms ) )
2018-12-09 19:19:32 +01:00
same_indexer = true ;
else
same_indexer = false ;
// Build kmer tables
ktable = hash_two_seq_columns ( query_view , query_seq_column , 0 , ref_view , ref_seq_column , 0 ) ;
if ( ktable = = NULL )
{
obi_set_errno ( OBI_ECOTAG_ERROR ) ;
obidebug ( 1 , " \n Error building kmer tables before assigning " ) ;
return - 1 ;
}
query_count = ( query_view - > infos ) - > line_count ;
ref_count = ( ref_view - > infos ) - > line_count ;
best_match_array = ( index_t * ) malloc ( buffer_size * sizeof ( index_t ) ) ;
if ( best_match_array = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for the best match index array in ecotag " ) ;
return - 1 ;
}
best_match_ids = ( char * ) malloc ( best_match_ids_buffer_size * sizeof ( char ) ) ;
if ( best_match_ids = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error allocating memory for the best match id array in ecotag " ) ;
return - 1 ;
}
for ( i = 0 ; i < query_count ; i + + )
{
2020-04-28 15:10:07 +02:00
if ( i % 1000 = = 0 )
2018-12-09 19:19:32 +01:00
fprintf ( stderr , " \r Done : %f %% " , ( i / ( float ) query_count ) * 100 ) ;
best_match_count = 0 ;
best_match_ids_length = 0 ;
threshold = ecotag_threshold ;
best_score = 0.0 ;
// Get first sequence and its index
query_seq_idx = obi_get_index_with_elt_idx_and_col_p_in_view ( query_view , query_seq_column , i , 0 ) ;
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view ( query_view , query_seq_column , i , 0 ) ;
if ( blob1 = = NULL )
{
obidebug ( 1 , " \n Error retrieving sequences to align when assigning " ) ;
return - 1 ;
}
for ( j = 0 ; j < ref_count ; j + + )
{
2019-09-21 16:47:22 +02:00
if ( ! keep_running )
return - 1 ;
2018-12-09 19:19:32 +01:00
// Get reference sequence and its index
ref_seq_idx = obi_get_index_with_elt_idx_and_col_p_in_view ( ref_view , ref_seq_column , j , 0 ) ;
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view ( ref_view , ref_seq_column , j , 0 ) ;
if ( blob2 = = NULL )
{
obidebug ( 1 , " \n Error retrieving sequences to align when assigning " ) ;
return - 1 ;
}
// Check if the sequences are identical in a quick way (same index in the same indexer)
if ( same_indexer & & ( query_seq_idx = = ref_seq_idx ) )
score = 1.0 ;
else // the sequences aren't identical or we don't know
{
// kmer filter (offset for the index of the kmer table of the 2nd sequence because the kmer tables of both the query and ref columns are concatenated in one)
align_filters ( ktable , blob1 , blob2 , i , query_count + j , threshold , true , ALILEN , true , & score , & lcs_min , ! same_indexer ) ;
// Compute alignment score
if ( ( score < 0 ) & & ( ( threshold = = 0 ) | | ( score = = - 1.0 ) ) ) // (sequences are not identical), and (no threshold, or filter passed): align
score = obiblob_sse_banded_lcs_align ( blob1 , blob2 , threshold , true , ALILEN , true , & lcs_length , & ali_length ) ;
}
if ( ( score > = threshold ) & & ( score > = best_score ) )
{
// Replace everything if score is better
if ( score > best_score )
{
// Up the alignment threshold because we only want scores equal or greater than the best found
threshold = score ;
best_score = score ;
// Reset the array with that match
best_match_ids_length = 0 ;
best_match_count = 0 ;
}
// Store in best match array
// Grow match array if needed
if ( best_match_count = = buffer_size )
{
buffer_size = buffer_size * 2 ;
best_match_array = ( index_t * ) realloc ( best_match_array , buffer_size * sizeof ( index_t ) ) ;
if ( best_match_array = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating match array when assigning " ) ;
return - 1 ;
}
}
id = obi_get_str_with_elt_idx_and_col_p_in_view ( ref_view , ref_id_column , j , 0 ) ;
id_len = strlen ( id ) ;
// Grow ids array if needed
2019-07-06 16:31:19 +02:00
while ( ( best_match_ids_length + id_len + 1 ) > = best_match_ids_buffer_size )
2018-12-09 19:19:32 +01:00
{
best_match_ids_buffer_size = best_match_ids_buffer_size * 2 ;
best_match_ids = ( char * ) realloc ( best_match_ids , best_match_ids_buffer_size * sizeof ( char ) ) ;
if ( best_match_ids = = NULL )
{
obi_set_errno ( OBI_MALLOC_ERROR ) ;
obidebug ( 1 , " \n Error reallocating match ids array when assigning " ) ;
return - 1 ;
}
}
// Save match
best_match_array [ best_match_count ] = j ;
best_match_count + + ;
strcpy ( best_match_ids + best_match_ids_length , id ) ;
best_match_ids_length = best_match_ids_length + id_len + 1 ;
}
}
// Get LCA of the LCAs of the best matches
2019-07-23 19:02:17 +02:00
lca = NULL ;
lca_taxid = - 1 ;
2018-12-09 19:19:32 +01:00
for ( j = 0 ; j < best_match_count ; j + + )
{
best_match_idx = best_match_array [ j ] ;
// Find the LCA for the chosen threshold
score_array = obi_get_array_with_col_p_in_view ( ref_view , score_a_column , best_match_idx , & lca_array_length ) ;
k = 0 ;
2020-04-28 15:10:07 +02:00
while ( ( k < lca_array_length ) & & ( score_array [ k ] > = best_score ) )
2018-12-09 19:19:32 +01:00
k + + ;
if ( k > 0 )
{
lca_array = obi_get_array_with_col_p_in_view ( ref_view , lca_taxid_a_column , best_match_idx , & lca_array_length ) ;
if ( j > 0 )
{
2020-04-28 15:10:07 +02:00
// lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
// if (lca == NULL)
// {
// obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
// return -1;
// }
2018-12-09 19:19:32 +01:00
lca_in_array = obi_taxo_get_taxon_with_taxid ( taxonomy , lca_array [ k - 1 ] ) ;
if ( lca_in_array = = NULL )
{
obidebug ( 1 , " \n Error getting a taxon from a taxid when doing taxonomic assignment " ) ;
return - 1 ;
}
lca = obi_taxo_get_lca ( lca , lca_in_array ) ;
if ( lca = = NULL )
{
obidebug ( 1 , " \n Error getting the LCA of two taxa when doing taxonomic assignment " ) ;
return - 1 ;
}
lca_taxid = lca - > taxid ;
}
else
{
lca_taxid = lca_array [ k - 1 ] ;
lca = obi_taxo_get_taxon_with_taxid ( taxonomy , lca_taxid ) ;
}
}
else
{
2019-07-23 19:02:17 +02:00
taxid_best_match = obi_get_int_with_elt_idx_and_col_p_in_view ( ref_view , ref_taxid_column , best_match_idx , 0 ) ;
2018-12-09 19:19:32 +01:00
best_match = obi_taxo_get_taxon_with_taxid ( taxonomy , taxid_best_match ) ;
if ( best_match = = NULL )
{
2019-08-18 19:45:53 +02:00
obidebug ( 1 , " \n Error getting a taxon from a taxid (%d) when doing taxonomic assignment " , taxid_best_match ) ;
2018-12-09 19:19:32 +01:00
return - 1 ;
}
if ( j > 0 )
{
lca = obi_taxo_get_lca ( lca , best_match ) ;
lca_taxid = lca - > taxid ;
}
else
{
lca_taxid = taxid_best_match ;
lca = obi_taxo_get_taxon_with_taxid ( taxonomy , lca_taxid ) ;
}
}
}
2019-07-23 19:02:17 +02:00
lca_name = NULL ;
2018-12-09 19:19:32 +01:00
if ( best_match_count > 0 )
{
assigned = true ;
// Get LCA name
if ( lca - > preferred_name ! = NULL )
lca_name = lca - > preferred_name ;
else
lca_name = lca - > name ;
2019-07-06 16:31:19 +02:00
best_match_ids_to_store = best_match_ids ;
2018-12-09 19:19:32 +01:00
}
else
{
assigned = false ;
lca_name = OBIStr_NA ;
lca_taxid = OBIInt_NA ;
2019-07-06 16:31:19 +02:00
best_match_ids_to_store = OBITuple_NA ;
2018-12-09 19:19:32 +01:00
score = OBIFloat_NA ;
}
// Print result in output view
if ( print_assignment_result ( output_view , i ,
assigned_taxid_column , lca_taxid ,
assigned_name_column , lca_name ,
assigned_status_column , assigned ,
2019-07-06 16:31:19 +02:00
best_match_column , best_match_ids_to_store , best_match_ids_length ,
2018-12-09 19:19:32 +01:00
score_column , best_score
) < 0 )
return - 1 ;
}
free ( best_match_array ) ;
free ( best_match_ids ) ;
obi_close_taxonomy ( taxonomy ) ;
obi_save_and_close_view ( query_view ) ;
obi_save_and_close_view ( ref_view ) ;
obi_save_and_close_view ( output_view ) ;
obi_close_dms ( dms , false ) ;
obi_close_dms ( ref_dms , false ) ;
obi_close_dms ( taxo_dms , false ) ;
2019-09-03 21:46:39 +02:00
fprintf ( stderr , " \r Done : 100 %% \n " ) ;
2018-12-09 19:19:32 +01:00
return 0 ;
}