C: build_ref_db: added progress display for each step
This commit is contained in:
@ -157,7 +157,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
ecotx_t* lca_2 = NULL;
|
ecotx_t* lca_2 = NULL;
|
||||||
ecotx_t* lca = NULL;
|
ecotx_t* lca = NULL;
|
||||||
index_t idx1, idx2;
|
index_t idx1, idx2;
|
||||||
index_t i, j, k;
|
index_t i, j, k, count;
|
||||||
int32_t taxid_array_length;
|
int32_t taxid_array_length;
|
||||||
int32_t score_array_length;
|
int32_t score_array_length;
|
||||||
int32_t taxid_array_writable_length;
|
int32_t taxid_array_writable_length;
|
||||||
@ -185,6 +185,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
||||||
strcat(matrix_view_name, "_matrix");
|
strcat(matrix_view_name, "_matrix");
|
||||||
|
|
||||||
|
fprintf(stderr, "Aligning queries with reference database...\n");
|
||||||
if (obi_lcs_align_one_column(dms_name,
|
if (obi_lcs_align_one_column(dms_name,
|
||||||
refs_view_name,
|
refs_view_name,
|
||||||
"",
|
"",
|
||||||
@ -320,13 +321,19 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
count = (matrix_with_lca_view->infos)->line_count;
|
||||||
|
fprintf(stderr, "Computing LCAs...\n");
|
||||||
|
|
||||||
// Compute all the LCAs
|
// Compute all the LCAs
|
||||||
// For each pair
|
// For each pair
|
||||||
for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
if (! keep_running)
|
if (! keep_running)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
// Read all taxids associated with the first sequence and compute their LCA
|
// Read all taxids associated with the first sequence and compute their LCA
|
||||||
// Read line index
|
// Read line index
|
||||||
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
||||||
@ -363,6 +370,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
// Clone refs view, add 2 arrays columns for lca and score, compute and write them
|
// Clone refs view, add 2 arrays columns for lca and score, compute and write them
|
||||||
|
|
||||||
@ -442,13 +450,18 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "Building LCA arrays...\n");
|
||||||
|
|
||||||
// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
|
// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
|
||||||
// Going through matrix once, filling refs arrays on the go for efficiency
|
// Going through matrix once, filling refs arrays on the go for efficiency
|
||||||
for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
if (! keep_running)
|
if (! keep_running)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
// Read ref line indexes
|
// Read ref line indexes
|
||||||
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
||||||
idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
|
idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
|
||||||
@ -769,11 +782,17 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
|
fprintf(stderr, "Writing results...\n");
|
||||||
|
count = (o_view->infos)->line_count;
|
||||||
// Fill empty LCA informations (because filling from potentially sparse alignment matrix) with the sequence taxid
|
// Fill empty LCA informations (because filling from potentially sparse alignment matrix) with the sequence taxid
|
||||||
score=1.0; // technically getting LCA of identical sequences
|
score=1.0; // technically getting LCA of identical sequences
|
||||||
for (i=0; i<(o_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, i, &taxid_array_length);
|
obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, i, &taxid_array_length);
|
||||||
if (taxid_array_length == 0) // no LCA set
|
if (taxid_array_length == 0) // no LCA set
|
||||||
{
|
{
|
||||||
@ -799,6 +818,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
// Add information about the threshold used to build the DB
|
// Add information about the threshold used to build the DB
|
||||||
snprintf(threshold_str, 5, "%f", threshold);
|
snprintf(threshold_str, 5, "%f", threshold);
|
||||||
@ -858,7 +878,6 @@ int build_reference_db(const char* dms_name,
|
|||||||
free(matrix_view_name);
|
free(matrix_view_name);
|
||||||
free(matrix_with_lca_view_name);
|
free(matrix_with_lca_view_name);
|
||||||
|
|
||||||
fprintf(stderr,"\rDone : 100 %% \n");
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user