From 073d98db087484107954f03477f280b9a3073fac Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Sat, 31 Aug 2019 18:30:06 +0200 Subject: [PATCH] C: ecotag: now prints a warning if the demanded threshold is lower than the db threshold --- src/obi_ecotag.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/obi_ecotag.c b/src/obi_ecotag.c index 97a0c41..47c4aa7 100755 --- a/src/obi_ecotag.c +++ b/src/obi_ecotag.c @@ -32,7 +32,7 @@ #include "upperband.h" #include "obiblob.h" #include "build_reference_db.h" - +#include "libjson/json_utils.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) @@ -198,7 +198,7 @@ int obi_ecotag(const char* dms_name, const char* taxonomy_name, const char* output_view_name, const char* output_view_comments, - double ecotag_threshold) + double ecotag_threshold) // TODO different threshold for the similarity sphere around ref seqs { // For each sequence @@ -267,6 +267,9 @@ int obi_ecotag(const char* dms_name, OBIDMS_column_p score_a_column = NULL; OBIDMS_column_p ref_taxid_column = NULL; + char* db_threshold_str = NULL; + double db_threshold; + buffer_size = 1024; best_match_ids_buffer_size = 1024; @@ -333,7 +336,7 @@ int obi_ecotag(const char* dms_name, return -1; } - // Open the column of reference sequences to assign + // Open the column of reference sequences to compare the query sequences to if (strcmp((ref_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0) ref_seq_column = obi_view_get_column(ref_view, NUC_SEQUENCE_COLUMN); else @@ -348,6 +351,25 @@ int obi_ecotag(const char* dms_name, return -1; } + // Check if the demanded threshold is lower than the threshold used to build the reference database + db_threshold_str = obi_read_comment((ref_view->infos)->comments, DB_THRESHOLD_KEY_IN_COMMENTS); + if (db_threshold_str == NULL) + { + obidebug(1, "\nError reading the threshold used to build the reference database."); + return -1; + } + if (sscanf(db_threshold_str+1, "%lf", &db_threshold) <= 0) + { + obidebug(1, "\nError reading the threshold used to build the reference database."); + return -1; + } + free(db_threshold_str); + if (ecotag_threshold < db_threshold) + fprintf(stderr, "\nWarning: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f). " + "\n\tMeaning that the similarity *between reference sequences* below the ref db threshold will not be considered. " + "\n\tEcotag normally uses that similarity for better results but it works fine without it.\n\n", + ecotag_threshold, db_threshold); + // Open the ID column of reference sequences ref_id_column = obi_view_get_column(ref_view, ID_COLUMN); if (ref_id_column == NULL)