diff --git a/python/obitools3/commands/ecotag.pyx b/python/obitools3/commands/ecotag.pyx index d916087..4c747ab 100755 --- a/python/obitools3/commands/ecotag.pyx +++ b/python/obitools3/commands/ecotag.pyx @@ -41,6 +41,17 @@ def addOptions(parser): help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. " "Default: 0.00 (no threshold).") + group.add_argument('--minimum-circle','-c', + action="store", dest="ecotag:bubble_threshold", + metavar='', + default=0.99, + type=float, + help="Minimum identity considered for the assignment circle " + "(sequence is assigned to the LCA of all sequences within a similarity circle of the best matches; " + "the threshold for this circle is the highest value between and the best assignment score found). " + "Give value as a normalized identity, e.g. 0.95 for an identity of 95%%. " + "Default: 0.99.") + def run(config): DMS.obi_atexit() @@ -66,9 +77,8 @@ def run(config): ref_view_name = ref[1] # Check that the threshold demanded is greater than or equal to the threshold used to build the reference database - if config['ecotag']['threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) : - print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).", - config['ecotag']['threshold'], ref_dms[ref_view_name].comments["ref_db_threshold"]) + if config['ecotag']['bubble_threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) : + raise Exception(f"Error: The threshold demanded ({config['ecotag']['bubble_threshold']}) is lower than the threshold used to build the reference database ({float(ref_dms[ref_view_name].comments['ref_db_threshold'])}).") # Open the output: only the DMS output = open_uri(config['obi']['outputURI'], @@ -113,8 +123,9 @@ def run(config): if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \ ref_dms.name_with_full_path, tobytes(ref_view_name), \ taxo_dms.name_with_full_path, tobytes(taxonomy_name), \ - tobytes(o_view_name), comments, - config['ecotag']['threshold']) < 0: + tobytes(o_view_name), comments, \ + config['ecotag']['threshold'], \ + config['ecotag']['bubble_threshold']) < 0: raise Exception("Error running ecotag") # If the input and output DMS are not the same, export result view to output DMS diff --git a/python/obitools3/dms/capi/obiecotag.pxd b/python/obitools3/dms/capi/obiecotag.pxd index c3d112c..e1030e0 100755 --- a/python/obitools3/dms/capi/obiecotag.pxd +++ b/python/obitools3/dms/capi/obiecotag.pxd @@ -11,4 +11,5 @@ cdef extern from "obi_ecotag.h" nogil: const char* taxonomy_name, const char* output_view_name, const char* output_view_comments, - double ecotag_threshold) + double ecotag_threshold, + double bubble_threshold) diff --git a/python/obitools3/version.py b/python/obitools3/version.py index 40155f4..af6476f 100755 --- a/python/obitools3/version.py +++ b/python/obitools3/version.py @@ -1,5 +1,5 @@ major = 3 minor = 0 -serial= '0b34' +serial= '0b35' version ="%d.%d.%s" % (major,minor,serial) diff --git a/src/obi_ecotag.c b/src/obi_ecotag.c index df69aee..3773479 100755 --- a/src/obi_ecotag.c +++ b/src/obi_ecotag.c @@ -218,7 +218,8 @@ int obi_ecotag(const char* dms_name, const char* taxonomy_name, const char* output_view_name, const char* output_view_comments, - double ecotag_threshold) // TODO different threshold for the similarity sphere around ref seqs + double ecotag_threshold, + double bubble_threshold) { // For each sequence @@ -239,6 +240,7 @@ int obi_ecotag(const char* dms_name, index_t query_seq_idx, ref_seq_idx; double score, best_score; double threshold; + double lca_threshold; int lcs_length; int ali_length; Kmer_table_p ktable; @@ -389,10 +391,10 @@ int obi_ecotag(const char* dms_name, return -1; } free(db_threshold_str); - if (ecotag_threshold < db_threshold) + if (bubble_threshold < db_threshold) { fprintf(stderr, "\nError: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).\n\n", - ecotag_threshold, db_threshold); + bubble_threshold, db_threshold); return -1; } @@ -597,11 +599,16 @@ int obi_ecotag(const char* dms_name, { best_match_idx = best_match_array[j]; - // Find the LCA for the chosen threshold + // Find the LCA for the highest threshold between best_score and the chosen bubble threshold score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length); + if (bubble_threshold < best_score) + lca_threshold = best_score; + else + lca_threshold = bubble_threshold; + k = 0; - while ((k < lca_array_length) && (score_array[k] >= best_score)) + while ((k < lca_array_length) && (score_array[k] >= lca_threshold)) k++; if (k>0) diff --git a/src/obi_ecotag.h b/src/obi_ecotag.h index ed88194..000eb87 100755 --- a/src/obi_ecotag.h +++ b/src/obi_ecotag.h @@ -42,12 +42,14 @@ * @param output_view_name The name to give to the output view. * @param output_view_comments The comments to associate to the output view. * @param ecotag_threshold The threshold at which to assign. + * @param bubble_threshold The threshold at which to look for an LCA (i.e. minimum identity considered for the assignment circle); + * the threshold actually used will be the highest between this value and the best assignment score found. * * The algorithm works like this: * For each query sequence: * Align with reference database * Keep the indices of all the best matches - * For each kept index, get the LCA at that threshold as stored in the reference database, then the LCA of those LCAs + * For each kept index, get the LCA at the highest threshold between bubble_threshold and the best assignment score found (as stored in the reference database), then the LCA of those LCAs * Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches) * * @returns A value indicating the success of the operation. @@ -65,7 +67,8 @@ int obi_ecotag(const char* dms_name, const char* taxonomy_name, const char* output_view_name, const char* output_view_comments, - double ecotag_threshold); + double ecotag_threshold, + double bubble_threshold); #endif /* OBI_ECOTAG_H_ */