ecotag: Added list of taxids for all best matches (closes #80)
This commit is contained in:
@ -71,9 +71,12 @@ static int create_output_columns(Obiview_p o_view);
|
||||
* @param name The assigned scientific name.
|
||||
* @param assigned_status_column A pointer on the column where the assigned status should be written.
|
||||
* @param assigned The assigned status (whether the sequence was assigned to a taxon or not).
|
||||
* @param best_match_column A pointer on the column where the list of ids of the best matches should be written.
|
||||
* @param best_match_ids_column A pointer on the column where the list of ids of the best matches should be written.
|
||||
* @param best_match_ids The list of ids of the best matches as an array of the concatenated ids separated by '\0'.
|
||||
* @param best_match_ids_length The total length of the array of ids of best matches.
|
||||
* @param best_match_taxids_column A pointer on the column where the list of taxids of the best matches should be written.
|
||||
* @param best_match_taxids The list of taxids of the best matches as an array of the taxids.
|
||||
* @param best_match_taxids_length The length of the array of taxids of best matches.
|
||||
* @param score_column A pointer on the column where the score should be written.
|
||||
* @param score The similarity score of the sequence with its best match(es).
|
||||
*
|
||||
@ -87,7 +90,8 @@ int print_assignment_result(Obiview_p output_view, index_t line,
|
||||
OBIDMS_column_p assigned_taxid_column, int32_t taxid,
|
||||
OBIDMS_column_p assigned_name_column, const char* name,
|
||||
OBIDMS_column_p assigned_status_column, bool assigned,
|
||||
OBIDMS_column_p best_match_column, const char* best_match_ids, int best_match_ids_length,
|
||||
OBIDMS_column_p best_match_ids_column, const char* best_match_ids, int best_match_ids_length,
|
||||
OBIDMS_column_p best_match_taxids_column, const int32_t* best_match_taxids, int best_match_taxids_length,
|
||||
OBIDMS_column_p score_column, double score);
|
||||
|
||||
|
||||
@ -130,7 +134,14 @@ static int create_output_columns(Obiview_p o_view)
|
||||
// Column for array of best match ids
|
||||
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
|
||||
obidebug(1, "\nError creating the column for the array of ids of best matches in ecotag");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Column for array of best match taxids
|
||||
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the column for the array of taxids of best matches in ecotag");
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -142,7 +153,8 @@ int print_assignment_result(Obiview_p output_view, index_t line,
|
||||
OBIDMS_column_p assigned_taxid_column, int32_t taxid,
|
||||
OBIDMS_column_p assigned_name_column, const char* name,
|
||||
OBIDMS_column_p assigned_status_column, bool assigned,
|
||||
OBIDMS_column_p best_match_column, const char* best_match_ids, int best_match_ids_length,
|
||||
OBIDMS_column_p best_match_ids_column, const char* best_match_ids, int best_match_ids_length,
|
||||
OBIDMS_column_p best_match_taxids_column, const int32_t* best_match_taxids, int best_match_taxids_length,
|
||||
OBIDMS_column_p score_column, double score)
|
||||
{
|
||||
// Write the assigned taxid
|
||||
@ -167,9 +179,16 @@ int print_assignment_result(Obiview_p output_view, index_t line,
|
||||
}
|
||||
|
||||
// Write the best match ids
|
||||
if (obi_set_array_with_col_p_in_view(output_view, best_match_column, line, best_match_ids, (uint8_t)(sizeof(char)*8), best_match_ids_length) < 0)
|
||||
if (obi_set_array_with_col_p_in_view(output_view, best_match_ids_column, line, best_match_ids, (uint8_t)(sizeof(char)*8), best_match_ids_length) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing a assignment status in a column when writing ecotag results");
|
||||
obidebug(1, "\nError writing the array of best match ids in a column when writing ecotag results");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write the best match taxids
|
||||
if (obi_set_array_with_col_p_in_view(output_view, best_match_taxids_column, line, best_match_taxids, (uint8_t)(sizeof(OBI_INT)*8), best_match_taxids_length) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing the array of best match taxids in a column when writing ecotag results");
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -235,6 +254,8 @@ int obi_ecotag(const char* dms_name,
|
||||
char* best_match_ids;
|
||||
char* best_match_ids_to_store;
|
||||
int32_t best_match_ids_length;
|
||||
int32_t* best_match_taxids;
|
||||
int32_t* best_match_taxids_to_store;
|
||||
int best_match_count;
|
||||
int buffer_size;
|
||||
int best_match_ids_buffer_size;
|
||||
@ -263,7 +284,8 @@ int obi_ecotag(const char* dms_name,
|
||||
OBIDMS_column_p assigned_taxid_column = NULL;
|
||||
OBIDMS_column_p assigned_name_column = NULL;
|
||||
OBIDMS_column_p assigned_status_column = NULL;
|
||||
OBIDMS_column_p best_match_column = NULL;
|
||||
OBIDMS_column_p best_match_ids_column = NULL;
|
||||
OBIDMS_column_p best_match_taxids_column = NULL;
|
||||
OBIDMS_column_p lca_taxid_a_column = NULL;
|
||||
OBIDMS_column_p score_a_column = NULL;
|
||||
OBIDMS_column_p ref_taxid_column = NULL;
|
||||
@ -396,7 +418,8 @@ int obi_ecotag(const char* dms_name,
|
||||
assigned_taxid_column = obi_view_get_column(output_view, ECOTAG_TAXID_COLUMN_NAME);
|
||||
assigned_name_column = obi_view_get_column(output_view, ECOTAG_NAME_COLUMN_NAME);
|
||||
assigned_status_column = obi_view_get_column(output_view, ECOTAG_STATUS_COLUMN_NAME);
|
||||
best_match_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME);
|
||||
best_match_ids_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME);
|
||||
best_match_taxids_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME);
|
||||
score_column = obi_view_get_column(output_view, ECOTAG_SCORE_COLUMN_NAME);
|
||||
|
||||
// Open the used reference columns
|
||||
@ -453,6 +476,14 @@ int obi_ecotag(const char* dms_name,
|
||||
return -1;
|
||||
}
|
||||
|
||||
best_match_taxids = (int32_t*) malloc(buffer_size* sizeof(int32_t));
|
||||
if (best_match_taxids == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for the best match taxid array in ecotag");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i < query_count; i++)
|
||||
{
|
||||
if (i%1000 == 0)
|
||||
@ -514,7 +545,7 @@ int obi_ecotag(const char* dms_name,
|
||||
|
||||
// Store in best match array
|
||||
|
||||
// Grow match array if needed
|
||||
// Grow match and taxid array if needed
|
||||
if (best_match_count == buffer_size)
|
||||
{
|
||||
buffer_size = buffer_size*2;
|
||||
@ -525,6 +556,13 @@ int obi_ecotag(const char* dms_name,
|
||||
obidebug(1, "\nError reallocating match array when assigning");
|
||||
return -1;
|
||||
}
|
||||
best_match_taxids = (int32_t*) realloc(best_match_taxids, buffer_size*sizeof(int32_t));
|
||||
if (best_match_taxids == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError reallocating match taxids array when assigning");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
id = obi_get_str_with_elt_idx_and_col_p_in_view(ref_view, ref_id_column, j, 0);
|
||||
@ -545,6 +583,7 @@ int obi_ecotag(const char* dms_name,
|
||||
|
||||
// Save match
|
||||
best_match_array[best_match_count] = j;
|
||||
best_match_taxids[best_match_count] = obi_get_int_with_elt_idx_and_col_p_in_view(ref_view, ref_taxid_column, j, 0);
|
||||
best_match_count++;
|
||||
strcpy(best_match_ids+best_match_ids_length, id);
|
||||
best_match_ids_length = best_match_ids_length + id_len + 1;
|
||||
@ -629,6 +668,7 @@ int obi_ecotag(const char* dms_name,
|
||||
else
|
||||
lca_name = lca->name;
|
||||
best_match_ids_to_store = best_match_ids;
|
||||
best_match_taxids_to_store = best_match_taxids;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -636,6 +676,7 @@ int obi_ecotag(const char* dms_name,
|
||||
lca_name = OBIStr_NA;
|
||||
lca_taxid = OBIInt_NA;
|
||||
best_match_ids_to_store = OBITuple_NA;
|
||||
best_match_taxids_to_store = OBITuple_NA;
|
||||
score = OBIFloat_NA;
|
||||
}
|
||||
|
||||
@ -644,7 +685,8 @@ int obi_ecotag(const char* dms_name,
|
||||
assigned_taxid_column, lca_taxid,
|
||||
assigned_name_column, lca_name,
|
||||
assigned_status_column, assigned,
|
||||
best_match_column, best_match_ids_to_store, best_match_ids_length,
|
||||
best_match_ids_column, best_match_ids_to_store, best_match_ids_length,
|
||||
best_match_taxids_column, best_match_taxids_to_store, best_match_count,
|
||||
score_column, best_score
|
||||
) < 0)
|
||||
return -1;
|
||||
@ -652,6 +694,7 @@ int obi_ecotag(const char* dms_name,
|
||||
|
||||
free(best_match_array);
|
||||
free(best_match_ids);
|
||||
free(best_match_taxids);
|
||||
|
||||
obi_close_taxonomy(taxonomy);
|
||||
obi_save_and_close_view(query_view);
|
||||
|
@ -23,7 +23,8 @@
|
||||
#define ECOTAG_TAXID_COLUMN_NAME "TAXID"
|
||||
#define ECOTAG_NAME_COLUMN_NAME "SCIENTIFIC_NAME"
|
||||
#define ECOTAG_STATUS_COLUMN_NAME "ID_STATUS"
|
||||
#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH"
|
||||
#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH_IDS"
|
||||
#define ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME "BEST_MATCH_TAXIDS"
|
||||
#define ECOTAG_SCORE_COLUMN_NAME "BEST_IDENTITY"
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user