Compare commits

..

1 Commits

Author SHA1 Message Date
cb53381863 ecotag: BEST_MATCH_TAXIDS now dereplicated (no repeated taxids in the
list) and switch to version 3.0.1b8
2021-05-10 16:02:06 +12:00
2 changed files with 30 additions and 5 deletions

View File

@ -1,5 +1,5 @@
major = 3 major = 3
minor = 0 minor = 0
serial= '1b7' serial= '1b8'
version ="%d.%d.%s" % (major,minor,serial) version ="%d.%d.%s" % (major,minor,serial)

View File

@ -233,7 +233,7 @@ int obi_ecotag(const char* dms_name,
// Write result (max score, threshold, LCA assigned, list of the ids of the best matches) // Write result (max score, threshold, LCA assigned, list of the ids of the best matches)
index_t i, j, k; index_t i, j, k, t;
ecotx_t* lca; ecotx_t* lca;
ecotx_t* lca_in_array; ecotx_t* lca_in_array;
ecotx_t* best_match; ecotx_t* best_match;
@ -259,16 +259,20 @@ int obi_ecotag(const char* dms_name,
int32_t* best_match_taxids; int32_t* best_match_taxids;
int32_t* best_match_taxids_to_store; int32_t* best_match_taxids_to_store;
int best_match_count; int best_match_count;
int best_match_taxid_count;
int buffer_size; int buffer_size;
int best_match_ids_buffer_size; int best_match_ids_buffer_size;
index_t best_match_idx; index_t best_match_idx;
int32_t lca_array_length; int32_t lca_array_length;
int32_t lca_taxid; int32_t lca_taxid;
int32_t taxid_best_match; int32_t taxid_best_match;
int32_t taxid;
int32_t taxid_to_store;
bool assigned; bool assigned;
const char* lca_name; const char* lca_name;
const char* id; const char* id;
int id_len; int id_len;
bool already_in;
OBIDMS_p dms = NULL; OBIDMS_p dms = NULL;
OBIDMS_p ref_dms = NULL; OBIDMS_p ref_dms = NULL;
@ -488,10 +492,11 @@ int obi_ecotag(const char* dms_name,
for (i=0; i < query_count; i++) for (i=0; i < query_count; i++)
{ {
if (i%1000 == 0) if (i%10 == 0)
fprintf(stderr,"\rDone : %f %% ", (i / (float) query_count)*100); fprintf(stderr,"\rDone : %f %% ", (i / (float) query_count)*100);
best_match_count = 0; best_match_count = 0;
best_match_taxid_count = 0;
best_match_ids_length = 0; best_match_ids_length = 0;
threshold = ecotag_threshold; threshold = ecotag_threshold;
best_score = 0.0; best_score = 0.0;
@ -543,6 +548,7 @@ int obi_ecotag(const char* dms_name,
// Reset the array with that match // Reset the array with that match
best_match_ids_length = 0; best_match_ids_length = 0;
best_match_count = 0; best_match_count = 0;
best_match_taxid_count = 0;
} }
// Store in best match array // Store in best match array
@ -585,8 +591,27 @@ int obi_ecotag(const char* dms_name,
// Save match // Save match
best_match_array[best_match_count] = j; best_match_array[best_match_count] = j;
best_match_taxids[best_match_count] = obi_get_int_with_elt_idx_and_col_p_in_view(ref_view, ref_taxid_column, j, 0);
best_match_count++; best_match_count++;
// Save best match taxid only if not already in array
taxid_to_store = obi_get_int_with_elt_idx_and_col_p_in_view(ref_view, ref_taxid_column, j, 0);
already_in = false;
for (t=0; t<best_match_taxid_count; t++)
{
taxid = best_match_taxids[t];
//fprintf(stderr, "\ntaxid %d, taxid_to_store %d\n", taxid, taxid_to_store);
if (taxid == taxid_to_store)
{
already_in = true;
break;
}
}
if (! already_in)
{
best_match_taxids[best_match_taxid_count] = taxid_to_store;
best_match_taxid_count++;
}
strcpy(best_match_ids+best_match_ids_length, id); strcpy(best_match_ids+best_match_ids_length, id);
best_match_ids_length = best_match_ids_length + id_len + 1; best_match_ids_length = best_match_ids_length + id_len + 1;
} }
@ -693,7 +718,7 @@ int obi_ecotag(const char* dms_name,
assigned_name_column, lca_name, assigned_name_column, lca_name,
assigned_status_column, assigned, assigned_status_column, assigned,
best_match_ids_column, best_match_ids_to_store, best_match_ids_length, best_match_ids_column, best_match_ids_to_store, best_match_ids_length,
best_match_taxids_column, best_match_taxids_to_store, best_match_count, best_match_taxids_column, best_match_taxids_to_store, best_match_taxid_count,
score_column, best_score score_column, best_score
) < 0) ) < 0)
return -1; return -1;