C: ecotag: now prints a warning if the demanded threshold is lower than

the db threshold
This commit is contained in:
Celine Mercier
2019-08-31 18:30:06 +02:00
parent 0ee728c4d0
commit 073d98db08

View File

@ -32,7 +32,7 @@
#include "upperband.h" #include "upperband.h"
#include "obiblob.h" #include "obiblob.h"
#include "build_reference_db.h" #include "build_reference_db.h"
#include "libjson/json_utils.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
@ -198,7 +198,7 @@ int obi_ecotag(const char* dms_name,
const char* taxonomy_name, const char* taxonomy_name,
const char* output_view_name, const char* output_view_name,
const char* output_view_comments, const char* output_view_comments,
double ecotag_threshold) double ecotag_threshold) // TODO different threshold for the similarity sphere around ref seqs
{ {
// For each sequence // For each sequence
@ -267,6 +267,9 @@ int obi_ecotag(const char* dms_name,
OBIDMS_column_p score_a_column = NULL; OBIDMS_column_p score_a_column = NULL;
OBIDMS_column_p ref_taxid_column = NULL; OBIDMS_column_p ref_taxid_column = NULL;
char* db_threshold_str = NULL;
double db_threshold;
buffer_size = 1024; buffer_size = 1024;
best_match_ids_buffer_size = 1024; best_match_ids_buffer_size = 1024;
@ -333,7 +336,7 @@ int obi_ecotag(const char* dms_name,
return -1; return -1;
} }
// Open the column of reference sequences to assign // Open the column of reference sequences to compare the query sequences to
if (strcmp((ref_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0) if (strcmp((ref_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
ref_seq_column = obi_view_get_column(ref_view, NUC_SEQUENCE_COLUMN); ref_seq_column = obi_view_get_column(ref_view, NUC_SEQUENCE_COLUMN);
else else
@ -348,6 +351,25 @@ int obi_ecotag(const char* dms_name,
return -1; return -1;
} }
// Check if the demanded threshold is lower than the threshold used to build the reference database
db_threshold_str = obi_read_comment((ref_view->infos)->comments, DB_THRESHOLD_KEY_IN_COMMENTS);
if (db_threshold_str == NULL)
{
obidebug(1, "\nError reading the threshold used to build the reference database.");
return -1;
}
if (sscanf(db_threshold_str+1, "%lf", &db_threshold) <= 0)
{
obidebug(1, "\nError reading the threshold used to build the reference database.");
return -1;
}
free(db_threshold_str);
if (ecotag_threshold < db_threshold)
fprintf(stderr, "\nWarning: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f). "
"\n\tMeaning that the similarity *between reference sequences* below the ref db threshold will not be considered. "
"\n\tEcotag normally uses that similarity for better results but it works fine without it.\n\n",
ecotag_threshold, db_threshold);
// Open the ID column of reference sequences // Open the ID column of reference sequences
ref_id_column = obi_view_get_column(ref_view, ID_COLUMN); ref_id_column = obi_view_get_column(ref_view, ID_COLUMN);
if (ref_id_column == NULL) if (ref_id_column == NULL)