ecotag: added separate threshold for minimum circle identity (and switch
to version 3.0.0b35
This commit is contained in:
@ -218,7 +218,8 @@ int obi_ecotag(const char* dms_name,
|
||||
const char* taxonomy_name,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
double ecotag_threshold) // TODO different threshold for the similarity sphere around ref seqs
|
||||
double ecotag_threshold,
|
||||
double bubble_threshold)
|
||||
{
|
||||
|
||||
// For each sequence
|
||||
@ -239,6 +240,7 @@ int obi_ecotag(const char* dms_name,
|
||||
index_t query_seq_idx, ref_seq_idx;
|
||||
double score, best_score;
|
||||
double threshold;
|
||||
double lca_threshold;
|
||||
int lcs_length;
|
||||
int ali_length;
|
||||
Kmer_table_p ktable;
|
||||
@ -389,10 +391,10 @@ int obi_ecotag(const char* dms_name,
|
||||
return -1;
|
||||
}
|
||||
free(db_threshold_str);
|
||||
if (ecotag_threshold < db_threshold)
|
||||
if (bubble_threshold < db_threshold)
|
||||
{
|
||||
fprintf(stderr, "\nError: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).\n\n",
|
||||
ecotag_threshold, db_threshold);
|
||||
bubble_threshold, db_threshold);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -597,11 +599,16 @@ int obi_ecotag(const char* dms_name,
|
||||
{
|
||||
best_match_idx = best_match_array[j];
|
||||
|
||||
// Find the LCA for the chosen threshold
|
||||
// Find the LCA for the highest threshold between best_score and the chosen bubble threshold
|
||||
score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
|
||||
|
||||
if (bubble_threshold < best_score)
|
||||
lca_threshold = best_score;
|
||||
else
|
||||
lca_threshold = bubble_threshold;
|
||||
|
||||
k = 0;
|
||||
while ((k < lca_array_length) && (score_array[k] >= best_score))
|
||||
while ((k < lca_array_length) && (score_array[k] >= lca_threshold))
|
||||
k++;
|
||||
|
||||
if (k>0)
|
||||
|
@ -42,12 +42,14 @@
|
||||
* @param output_view_name The name to give to the output view.
|
||||
* @param output_view_comments The comments to associate to the output view.
|
||||
* @param ecotag_threshold The threshold at which to assign.
|
||||
* @param bubble_threshold The threshold at which to look for an LCA (i.e. minimum identity considered for the assignment circle);
|
||||
* the threshold actually used will be the highest between this value and the best assignment score found.
|
||||
*
|
||||
* The algorithm works like this:
|
||||
* For each query sequence:
|
||||
* Align with reference database
|
||||
* Keep the indices of all the best matches
|
||||
* For each kept index, get the LCA at that threshold as stored in the reference database, then the LCA of those LCAs
|
||||
* For each kept index, get the LCA at the highest threshold between bubble_threshold and the best assignment score found (as stored in the reference database), then the LCA of those LCAs
|
||||
* Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches)
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
@ -65,7 +67,8 @@ int obi_ecotag(const char* dms_name,
|
||||
const char* taxonomy_name,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
double ecotag_threshold);
|
||||
double ecotag_threshold,
|
||||
double bubble_threshold);
|
||||
|
||||
|
||||
#endif /* OBI_ECOTAG_H_ */
|
||||
|
Reference in New Issue
Block a user