ecotag: Added list of taxids for all best matches (closes #80)
This commit is contained in:
@ -71,9 +71,12 @@ static int create_output_columns(Obiview_p o_view);
|
|||||||
* @param name The assigned scientific name.
|
* @param name The assigned scientific name.
|
||||||
* @param assigned_status_column A pointer on the column where the assigned status should be written.
|
* @param assigned_status_column A pointer on the column where the assigned status should be written.
|
||||||
* @param assigned The assigned status (whether the sequence was assigned to a taxon or not).
|
* @param assigned The assigned status (whether the sequence was assigned to a taxon or not).
|
||||||
* @param best_match_column A pointer on the column where the list of ids of the best matches should be written.
|
* @param best_match_ids_column A pointer on the column where the list of ids of the best matches should be written.
|
||||||
* @param best_match_ids The list of ids of the best matches as an array of the concatenated ids separated by '\0'.
|
* @param best_match_ids The list of ids of the best matches as an array of the concatenated ids separated by '\0'.
|
||||||
* @param best_match_ids_length The total length of the array of ids of best matches.
|
* @param best_match_ids_length The total length of the array of ids of best matches.
|
||||||
|
* @param best_match_taxids_column A pointer on the column where the list of taxids of the best matches should be written.
|
||||||
|
* @param best_match_taxids The list of taxids of the best matches as an array of the taxids.
|
||||||
|
* @param best_match_taxids_length The length of the array of taxids of best matches.
|
||||||
* @param score_column A pointer on the column where the score should be written.
|
* @param score_column A pointer on the column where the score should be written.
|
||||||
* @param score The similarity score of the sequence with its best match(es).
|
* @param score The similarity score of the sequence with its best match(es).
|
||||||
*
|
*
|
||||||
@ -87,7 +90,8 @@ int print_assignment_result(Obiview_p output_view, index_t line,
|
|||||||
OBIDMS_column_p assigned_taxid_column, int32_t taxid,
|
OBIDMS_column_p assigned_taxid_column, int32_t taxid,
|
||||||
OBIDMS_column_p assigned_name_column, const char* name,
|
OBIDMS_column_p assigned_name_column, const char* name,
|
||||||
OBIDMS_column_p assigned_status_column, bool assigned,
|
OBIDMS_column_p assigned_status_column, bool assigned,
|
||||||
OBIDMS_column_p best_match_column, const char* best_match_ids, int best_match_ids_length,
|
OBIDMS_column_p best_match_ids_column, const char* best_match_ids, int best_match_ids_length,
|
||||||
|
OBIDMS_column_p best_match_taxids_column, const int32_t* best_match_taxids, int best_match_taxids_length,
|
||||||
OBIDMS_column_p score_column, double score);
|
OBIDMS_column_p score_column, double score);
|
||||||
|
|
||||||
|
|
||||||
@ -130,7 +134,14 @@ static int create_output_columns(Obiview_p o_view)
|
|||||||
// Column for array of best match ids
|
// Column for array of best match ids
|
||||||
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
|
obidebug(1, "\nError creating the column for the array of ids of best matches in ecotag");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column for array of best match taxids
|
||||||
|
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError creating the column for the array of taxids of best matches in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,7 +153,8 @@ int print_assignment_result(Obiview_p output_view, index_t line,
|
|||||||
OBIDMS_column_p assigned_taxid_column, int32_t taxid,
|
OBIDMS_column_p assigned_taxid_column, int32_t taxid,
|
||||||
OBIDMS_column_p assigned_name_column, const char* name,
|
OBIDMS_column_p assigned_name_column, const char* name,
|
||||||
OBIDMS_column_p assigned_status_column, bool assigned,
|
OBIDMS_column_p assigned_status_column, bool assigned,
|
||||||
OBIDMS_column_p best_match_column, const char* best_match_ids, int best_match_ids_length,
|
OBIDMS_column_p best_match_ids_column, const char* best_match_ids, int best_match_ids_length,
|
||||||
|
OBIDMS_column_p best_match_taxids_column, const int32_t* best_match_taxids, int best_match_taxids_length,
|
||||||
OBIDMS_column_p score_column, double score)
|
OBIDMS_column_p score_column, double score)
|
||||||
{
|
{
|
||||||
// Write the assigned taxid
|
// Write the assigned taxid
|
||||||
@ -167,9 +179,16 @@ int print_assignment_result(Obiview_p output_view, index_t line,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Write the best match ids
|
// Write the best match ids
|
||||||
if (obi_set_array_with_col_p_in_view(output_view, best_match_column, line, best_match_ids, (uint8_t)(sizeof(char)*8), best_match_ids_length) < 0)
|
if (obi_set_array_with_col_p_in_view(output_view, best_match_ids_column, line, best_match_ids, (uint8_t)(sizeof(char)*8), best_match_ids_length) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError writing a assignment status in a column when writing ecotag results");
|
obidebug(1, "\nError writing the array of best match ids in a column when writing ecotag results");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the best match taxids
|
||||||
|
if (obi_set_array_with_col_p_in_view(output_view, best_match_taxids_column, line, best_match_taxids, (uint8_t)(sizeof(OBI_INT)*8), best_match_taxids_length) < 0)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError writing the array of best match taxids in a column when writing ecotag results");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,6 +254,8 @@ int obi_ecotag(const char* dms_name,
|
|||||||
char* best_match_ids;
|
char* best_match_ids;
|
||||||
char* best_match_ids_to_store;
|
char* best_match_ids_to_store;
|
||||||
int32_t best_match_ids_length;
|
int32_t best_match_ids_length;
|
||||||
|
int32_t* best_match_taxids;
|
||||||
|
int32_t* best_match_taxids_to_store;
|
||||||
int best_match_count;
|
int best_match_count;
|
||||||
int buffer_size;
|
int buffer_size;
|
||||||
int best_match_ids_buffer_size;
|
int best_match_ids_buffer_size;
|
||||||
@ -263,7 +284,8 @@ int obi_ecotag(const char* dms_name,
|
|||||||
OBIDMS_column_p assigned_taxid_column = NULL;
|
OBIDMS_column_p assigned_taxid_column = NULL;
|
||||||
OBIDMS_column_p assigned_name_column = NULL;
|
OBIDMS_column_p assigned_name_column = NULL;
|
||||||
OBIDMS_column_p assigned_status_column = NULL;
|
OBIDMS_column_p assigned_status_column = NULL;
|
||||||
OBIDMS_column_p best_match_column = NULL;
|
OBIDMS_column_p best_match_ids_column = NULL;
|
||||||
|
OBIDMS_column_p best_match_taxids_column = NULL;
|
||||||
OBIDMS_column_p lca_taxid_a_column = NULL;
|
OBIDMS_column_p lca_taxid_a_column = NULL;
|
||||||
OBIDMS_column_p score_a_column = NULL;
|
OBIDMS_column_p score_a_column = NULL;
|
||||||
OBIDMS_column_p ref_taxid_column = NULL;
|
OBIDMS_column_p ref_taxid_column = NULL;
|
||||||
@ -396,7 +418,8 @@ int obi_ecotag(const char* dms_name,
|
|||||||
assigned_taxid_column = obi_view_get_column(output_view, ECOTAG_TAXID_COLUMN_NAME);
|
assigned_taxid_column = obi_view_get_column(output_view, ECOTAG_TAXID_COLUMN_NAME);
|
||||||
assigned_name_column = obi_view_get_column(output_view, ECOTAG_NAME_COLUMN_NAME);
|
assigned_name_column = obi_view_get_column(output_view, ECOTAG_NAME_COLUMN_NAME);
|
||||||
assigned_status_column = obi_view_get_column(output_view, ECOTAG_STATUS_COLUMN_NAME);
|
assigned_status_column = obi_view_get_column(output_view, ECOTAG_STATUS_COLUMN_NAME);
|
||||||
best_match_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME);
|
best_match_ids_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME);
|
||||||
|
best_match_taxids_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME);
|
||||||
score_column = obi_view_get_column(output_view, ECOTAG_SCORE_COLUMN_NAME);
|
score_column = obi_view_get_column(output_view, ECOTAG_SCORE_COLUMN_NAME);
|
||||||
|
|
||||||
// Open the used reference columns
|
// Open the used reference columns
|
||||||
@ -453,6 +476,14 @@ int obi_ecotag(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
best_match_taxids = (int32_t*) malloc(buffer_size* sizeof(int32_t));
|
||||||
|
if (best_match_taxids == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for the best match taxid array in ecotag");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
for (i=0; i < query_count; i++)
|
for (i=0; i < query_count; i++)
|
||||||
{
|
{
|
||||||
if (i%1000 == 0)
|
if (i%1000 == 0)
|
||||||
@ -514,7 +545,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
|
|
||||||
// Store in best match array
|
// Store in best match array
|
||||||
|
|
||||||
// Grow match array if needed
|
// Grow match and taxid array if needed
|
||||||
if (best_match_count == buffer_size)
|
if (best_match_count == buffer_size)
|
||||||
{
|
{
|
||||||
buffer_size = buffer_size*2;
|
buffer_size = buffer_size*2;
|
||||||
@ -525,6 +556,13 @@ int obi_ecotag(const char* dms_name,
|
|||||||
obidebug(1, "\nError reallocating match array when assigning");
|
obidebug(1, "\nError reallocating match array when assigning");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
best_match_taxids = (int32_t*) realloc(best_match_taxids, buffer_size*sizeof(int32_t));
|
||||||
|
if (best_match_taxids == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError reallocating match taxids array when assigning");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
id = obi_get_str_with_elt_idx_and_col_p_in_view(ref_view, ref_id_column, j, 0);
|
id = obi_get_str_with_elt_idx_and_col_p_in_view(ref_view, ref_id_column, j, 0);
|
||||||
@ -545,6 +583,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
|
|
||||||
// Save match
|
// Save match
|
||||||
best_match_array[best_match_count] = j;
|
best_match_array[best_match_count] = j;
|
||||||
|
best_match_taxids[best_match_count] = obi_get_int_with_elt_idx_and_col_p_in_view(ref_view, ref_taxid_column, j, 0);
|
||||||
best_match_count++;
|
best_match_count++;
|
||||||
strcpy(best_match_ids+best_match_ids_length, id);
|
strcpy(best_match_ids+best_match_ids_length, id);
|
||||||
best_match_ids_length = best_match_ids_length + id_len + 1;
|
best_match_ids_length = best_match_ids_length + id_len + 1;
|
||||||
@ -629,6 +668,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
else
|
else
|
||||||
lca_name = lca->name;
|
lca_name = lca->name;
|
||||||
best_match_ids_to_store = best_match_ids;
|
best_match_ids_to_store = best_match_ids;
|
||||||
|
best_match_taxids_to_store = best_match_taxids;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -636,6 +676,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
lca_name = OBIStr_NA;
|
lca_name = OBIStr_NA;
|
||||||
lca_taxid = OBIInt_NA;
|
lca_taxid = OBIInt_NA;
|
||||||
best_match_ids_to_store = OBITuple_NA;
|
best_match_ids_to_store = OBITuple_NA;
|
||||||
|
best_match_taxids_to_store = OBITuple_NA;
|
||||||
score = OBIFloat_NA;
|
score = OBIFloat_NA;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -644,7 +685,8 @@ int obi_ecotag(const char* dms_name,
|
|||||||
assigned_taxid_column, lca_taxid,
|
assigned_taxid_column, lca_taxid,
|
||||||
assigned_name_column, lca_name,
|
assigned_name_column, lca_name,
|
||||||
assigned_status_column, assigned,
|
assigned_status_column, assigned,
|
||||||
best_match_column, best_match_ids_to_store, best_match_ids_length,
|
best_match_ids_column, best_match_ids_to_store, best_match_ids_length,
|
||||||
|
best_match_taxids_column, best_match_taxids_to_store, best_match_count,
|
||||||
score_column, best_score
|
score_column, best_score
|
||||||
) < 0)
|
) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
@ -652,6 +694,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
|
|
||||||
free(best_match_array);
|
free(best_match_array);
|
||||||
free(best_match_ids);
|
free(best_match_ids);
|
||||||
|
free(best_match_taxids);
|
||||||
|
|
||||||
obi_close_taxonomy(taxonomy);
|
obi_close_taxonomy(taxonomy);
|
||||||
obi_save_and_close_view(query_view);
|
obi_save_and_close_view(query_view);
|
||||||
|
@ -23,7 +23,8 @@
|
|||||||
#define ECOTAG_TAXID_COLUMN_NAME "TAXID"
|
#define ECOTAG_TAXID_COLUMN_NAME "TAXID"
|
||||||
#define ECOTAG_NAME_COLUMN_NAME "SCIENTIFIC_NAME"
|
#define ECOTAG_NAME_COLUMN_NAME "SCIENTIFIC_NAME"
|
||||||
#define ECOTAG_STATUS_COLUMN_NAME "ID_STATUS"
|
#define ECOTAG_STATUS_COLUMN_NAME "ID_STATUS"
|
||||||
#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH"
|
#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH_IDS"
|
||||||
|
#define ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME "BEST_MATCH_TAXIDS"
|
||||||
#define ECOTAG_SCORE_COLUMN_NAME "BEST_IDENTITY"
|
#define ECOTAG_SCORE_COLUMN_NAME "BEST_IDENTITY"
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user