ecotag: BEST_MATCH_TAXIDS now dereplicated (no repeated taxids in the

list) and switch to version 3.0.1b8
This commit is contained in:
mercierc
2021-05-10 16:02:06 +12:00
parent 72b3e5d872
commit cb53381863
2 changed files with 30 additions and 5 deletions

View File

@ -1,5 +1,5 @@
major = 3
minor = 0
serial= '1b7'
serial= '1b8'
version ="%d.%d.%s" % (major,minor,serial)

View File

@ -233,7 +233,7 @@ int obi_ecotag(const char* dms_name,
// Write result (max score, threshold, LCA assigned, list of the ids of the best matches)
index_t i, j, k;
index_t i, j, k, t;
ecotx_t* lca;
ecotx_t* lca_in_array;
ecotx_t* best_match;
@ -259,16 +259,20 @@ int obi_ecotag(const char* dms_name,
int32_t* best_match_taxids;
int32_t* best_match_taxids_to_store;
int best_match_count;
int best_match_taxid_count;
int buffer_size;
int best_match_ids_buffer_size;
index_t best_match_idx;
int32_t lca_array_length;
int32_t lca_taxid;
int32_t taxid_best_match;
int32_t taxid;
int32_t taxid_to_store;
bool assigned;
const char* lca_name;
const char* id;
int id_len;
bool already_in;
OBIDMS_p dms = NULL;
OBIDMS_p ref_dms = NULL;
@ -488,10 +492,11 @@ int obi_ecotag(const char* dms_name,
for (i=0; i < query_count; i++)
{
if (i%1000 == 0)
if (i%10 == 0)
fprintf(stderr,"\rDone : %f %% ", (i / (float) query_count)*100);
best_match_count = 0;
best_match_taxid_count = 0;
best_match_ids_length = 0;
threshold = ecotag_threshold;
best_score = 0.0;
@ -543,6 +548,7 @@ int obi_ecotag(const char* dms_name,
// Reset the array with that match
best_match_ids_length = 0;
best_match_count = 0;
best_match_taxid_count = 0;
}
// Store in best match array
@ -585,8 +591,27 @@ int obi_ecotag(const char* dms_name,
// Save match
best_match_array[best_match_count] = j;
best_match_taxids[best_match_count] = obi_get_int_with_elt_idx_and_col_p_in_view(ref_view, ref_taxid_column, j, 0);
best_match_count++;
// Save best match taxid only if not already in array
taxid_to_store = obi_get_int_with_elt_idx_and_col_p_in_view(ref_view, ref_taxid_column, j, 0);
already_in = false;
for (t=0; t<best_match_taxid_count; t++)
{
taxid = best_match_taxids[t];
//fprintf(stderr, "\ntaxid %d, taxid_to_store %d\n", taxid, taxid_to_store);
if (taxid == taxid_to_store)
{
already_in = true;
break;
}
}
if (! already_in)
{
best_match_taxids[best_match_taxid_count] = taxid_to_store;
best_match_taxid_count++;
}
strcpy(best_match_ids+best_match_ids_length, id);
best_match_ids_length = best_match_ids_length + id_len + 1;
}
@ -693,7 +718,7 @@ int obi_ecotag(const char* dms_name,
assigned_name_column, lca_name,
assigned_status_column, assigned,
best_match_ids_column, best_match_ids_to_store, best_match_ids_length,
best_match_taxids_column, best_match_taxids_to_store, best_match_count,
best_match_taxids_column, best_match_taxids_to_store, best_match_taxid_count,
score_column, best_score
) < 0)
return -1;