698 lines
23 KiB
C
Executable File
698 lines
23 KiB
C
Executable File
/***********************************************************************************
|
|
* Functions to build reference databases for the taxonomic assignment of sequences *
|
|
***********************************************************************************/
|
|
|
|
/**
|
|
* @file build_reference_db.c
|
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
|
* @date November 15th 2018
|
|
* @brief Functions to build referece databases for the taxonomic assignment of sequences.
|
|
*/
|
|
|
|
//#define OMP_SUPPORT // TODO
|
|
#ifdef OMP_SUPPORT
|
|
#include <omp.h>
|
|
#endif
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <stdbool.h>
|
|
#include <search.h>
|
|
#include <sys/time.h>
|
|
|
|
#include "build_reference_db.h"
|
|
#include "obidms.h"
|
|
#include "obidebug.h"
|
|
#include "obierrno.h"
|
|
#include "obitypes.h"
|
|
#include "obiview.h"
|
|
#include "obi_lcs.h"
|
|
#include "obidms_taxonomy.h"
|
|
#include "obidmscolumn_array.h"
|
|
|
|
|
|
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
|
|
|
|
|
/**************************************************************************
|
|
*
|
|
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
|
|
*
|
|
**************************************************************************/
|
|
|
|
|
|
|
|
/************************************************************************
|
|
*
|
|
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
|
*
|
|
************************************************************************/
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
*
|
|
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
|
|
*
|
|
**********************************************************************/
|
|
|
|
|
|
int build_reference_db(const char* dms_name,
|
|
const char* refs_view_name,
|
|
const char* taxonomy_name,
|
|
const char* o_view_name,
|
|
const char* o_view_comments,
|
|
double threshold)
|
|
{
|
|
OBIDMS_p dms = NULL;
|
|
OBIDMS_taxonomy_p tax = NULL;
|
|
char* matrix_view_name = NULL; // TODO discuss that it must be unique
|
|
char* matrix_with_lca_view_name = NULL; // TODO discuss that it must be unique
|
|
Obiview_p matrix_with_lca_view = NULL;
|
|
Obiview_p refs_view = NULL;
|
|
Obiview_p o_view = NULL;
|
|
OBIDMS_column_p matrix_idx1_column = NULL;
|
|
OBIDMS_column_p matrix_idx2_column = NULL;
|
|
OBIDMS_column_p refs_taxid_column = NULL;
|
|
OBIDMS_column_p matrix_lca_taxid_column = NULL;
|
|
OBIDMS_column_p matrix_score_column = NULL;
|
|
OBIDMS_column_p final_lca_taxid_a_column = NULL;
|
|
OBIDMS_column_p final_score_a_column = NULL;
|
|
obiint_t taxid1, taxid2, taxid_lca;
|
|
ecotx_t* taxon1 = NULL;
|
|
ecotx_t* taxon2 = NULL;
|
|
ecotx_t* lca = NULL;
|
|
index_t idx1, idx2;
|
|
index_t i, j, k;
|
|
int32_t taxid_array_length;
|
|
int32_t score_array_length;
|
|
int32_t taxid_array_writable_length;
|
|
int32_t score_array_writable_length;
|
|
obifloat_t score;
|
|
obiint_t* lca_taxid_array;
|
|
obifloat_t* score_array;
|
|
obiint_t lca_taxid_array_writable[1000];
|
|
obifloat_t score_array_writable[1000];
|
|
bool modified;
|
|
|
|
// Discuss keeping the matrix view or not
|
|
matrix_view_name = calloc((strlen(o_view_name)+strlen("_matrix")+1), sizeof(char));
|
|
if (matrix_view_name == NULL)
|
|
{
|
|
obidebug(1, "\nError allocating memory for the name of the matrix view when building a reference database");
|
|
return -1;
|
|
}
|
|
// TODO check and create view name that doesn't already exist
|
|
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
|
strcat(matrix_view_name, "_matrix");
|
|
|
|
if (obi_lcs_align_one_column(dms_name,
|
|
refs_view_name,
|
|
"",
|
|
"",
|
|
"",
|
|
matrix_view_name,
|
|
"{}", // TODO proper comments?
|
|
false, false,
|
|
threshold, true, 0, true,
|
|
1) < 0)
|
|
{
|
|
obidebug(1, "\nError aligning the sequences when building a reference database");
|
|
}
|
|
|
|
// Add a column to the matrix view for LCAs
|
|
|
|
// Clone the view with a new name
|
|
|
|
// Build the view name
|
|
matrix_with_lca_view_name = calloc((strlen(o_view_name)+strlen("_matrix_with_lca")+1), sizeof(char));
|
|
if (matrix_with_lca_view_name == NULL)
|
|
{
|
|
obidebug(1, "\nError allocating memory for the name of the matrix view with LCA when building a reference database");
|
|
return -1;
|
|
}
|
|
matrix_with_lca_view_name = strcpy(matrix_with_lca_view_name, o_view_name);
|
|
strcat(matrix_with_lca_view_name, "_matrix_with_lca");
|
|
|
|
// Clone the matrix view
|
|
// Open the DMS
|
|
dms = obi_open_dms(dms_name);
|
|
if (dms == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the DMS when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Clone the view
|
|
matrix_with_lca_view = obi_clone_view_from_name(dms, matrix_view_name, matrix_with_lca_view_name, NULL, "{}");
|
|
if (matrix_with_lca_view == NULL)
|
|
{
|
|
obidebug(1, "\nError creating the matrix with LCA view when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Add the LCA taxid column
|
|
if (obi_view_add_column(matrix_with_lca_view,
|
|
LCA_TAXID_COLUMN_NAME,
|
|
-1,
|
|
LCA_TAXID_COLUMN_NAME,
|
|
OBI_INT,
|
|
-1,
|
|
1,
|
|
"",
|
|
false,
|
|
false,
|
|
false,
|
|
"",
|
|
"",
|
|
-1,
|
|
"{}",
|
|
true)
|
|
< 0)
|
|
{
|
|
obidebug(1, "\nError adding the LCA column to the matrix with LCA view when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Open the taxonomy
|
|
tax = obi_read_taxonomy(dms, taxonomy_name, true);
|
|
if (tax == NULL)
|
|
{
|
|
obidebug(1, "\nError reading the taxonomy when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Open the reference sequences view
|
|
refs_view = obi_open_view(dms, refs_view_name);
|
|
if (refs_view == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the reference sequences view when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Save column pointers
|
|
matrix_idx1_column = obi_view_get_column(matrix_with_lca_view, IDX1_COLUMN_NAME);
|
|
if (matrix_idx1_column == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the first index column when building a reference database");
|
|
return -1;
|
|
}
|
|
matrix_idx2_column = obi_view_get_column(matrix_with_lca_view, IDX2_COLUMN_NAME);
|
|
if (matrix_idx2_column == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the second index column when building a reference database");
|
|
return -1;
|
|
}
|
|
refs_taxid_column = obi_view_get_column(refs_view, TAXID_COLUMN);
|
|
if (refs_taxid_column == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the taxid column when building a reference database");
|
|
return -1;
|
|
}
|
|
matrix_lca_taxid_column = obi_view_get_column(matrix_with_lca_view, LCA_TAXID_COLUMN_NAME);
|
|
if (matrix_lca_taxid_column == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the LCA column when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Compute all the LCAs
|
|
// For each pair
|
|
for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
|
|
{
|
|
// Read taxid1 and get taxon1
|
|
// Read line index
|
|
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
|
taxid1 = obi_get_int_with_elt_idx_and_col_p_in_view(refs_view, refs_taxid_column, idx1, 0);
|
|
if (taxid1 == OBIInt_NA)
|
|
{
|
|
obidebug(1, "\nError getting a taxid when building a reference database, seq %lld in refs view has no taxid associated", idx1);
|
|
return -1;
|
|
}
|
|
taxon1 = obi_taxo_get_taxon_with_taxid(tax, taxid1);
|
|
if (taxon1 == NULL)
|
|
{
|
|
obidebug(1, "\nError getting a taxon with taxid %d when building a reference database, seq %lld in refs view", taxid1, idx1);
|
|
return -1;
|
|
}
|
|
|
|
// Read taxid2 and get taxon2
|
|
idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
|
|
taxid2 = obi_get_int_with_elt_idx_and_col_p_in_view(refs_view, refs_taxid_column, idx2, 0);
|
|
if (taxid2 == OBIInt_NA)
|
|
{
|
|
obidebug(1, "\nError getting a taxid when building a reference database, seq %lld in refs view has no taxid associated", idx1);
|
|
return -1;
|
|
}
|
|
taxon2 = obi_taxo_get_taxon_with_taxid(tax, taxid2);
|
|
if (taxon2 == NULL)
|
|
{
|
|
obidebug(1, "\nError getting a taxon with taxid %d when building a reference database, seq %lld in refs view", taxid2, idx2);
|
|
return -1;
|
|
}
|
|
|
|
// Compute and write their LCA
|
|
lca = obi_taxo_get_lca(taxon1, taxon2);
|
|
if (lca == NULL)
|
|
{
|
|
obidebug(1, "\nError getting the last common ancestor of two taxa when building a reference database");
|
|
return -1;
|
|
}
|
|
taxid_lca = lca->taxid;
|
|
if (obi_set_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_lca_taxid_column, i, 0, taxid_lca) < 0)
|
|
{
|
|
obidebug(1, "\nError writing the last common ancestor of two taxa when building a reference database");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
// Clone refs view, add 2 arrays columns for lca and score, compute and write them
|
|
|
|
// Clone refs view
|
|
o_view = obi_clone_view(dms, refs_view, o_view_name, NULL, o_view_comments);
|
|
if (o_view == NULL)
|
|
{
|
|
obidebug(1, "\nError cloning the view of references when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Add the LCA taxid array column
|
|
if (obi_view_add_column(o_view,
|
|
LCA_TAXID_ARRAY_COLUMN_NAME,
|
|
-1,
|
|
LCA_TAXID_ARRAY_COLUMN_NAME,
|
|
OBI_INT,
|
|
-1,
|
|
1,
|
|
"",
|
|
false,
|
|
true,
|
|
false,
|
|
"",
|
|
"",
|
|
-1,
|
|
"{}",
|
|
true)
|
|
< 0)
|
|
{
|
|
obidebug(1, "\nError adding the LCA taxid column to the final view when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Add the score array column
|
|
if (obi_view_add_column(o_view,
|
|
LCA_SCORE_ARRAY_COLUMN_NAME,
|
|
-1,
|
|
LCA_SCORE_ARRAY_COLUMN_NAME,
|
|
OBI_FLOAT,
|
|
-1,
|
|
1,
|
|
"",
|
|
false,
|
|
true,
|
|
false,
|
|
"",
|
|
"",
|
|
-1,
|
|
"{}",
|
|
true)
|
|
< 0)
|
|
{
|
|
obidebug(1, "\nError adding the score column to the final view when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Open the newly added columns
|
|
final_lca_taxid_a_column = obi_view_get_column(o_view, LCA_TAXID_ARRAY_COLUMN_NAME);
|
|
if (final_lca_taxid_a_column == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the LCA taxid array column when building a reference database");
|
|
return -1;
|
|
}
|
|
final_score_a_column = obi_view_get_column(o_view, LCA_SCORE_ARRAY_COLUMN_NAME);
|
|
if (final_score_a_column == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the score array column when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Open alignment score column
|
|
matrix_score_column = obi_view_get_column(matrix_with_lca_view, SCORE_COLUMN_NAME);
|
|
if (matrix_score_column == NULL)
|
|
{
|
|
obidebug(1, "\nError opening the alignment score column when building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
|
|
// Going through matrix once, filling refs arrays on the go for efficiency
|
|
for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
|
|
{
|
|
// Read ref line indexes
|
|
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
|
idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
|
|
// Read LCA taxid
|
|
taxid_lca = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_lca_taxid_column, i, 0);
|
|
// Get LCA taxon
|
|
lca = obi_taxo_get_taxon_with_taxid(tax, taxid_lca);
|
|
if (lca == NULL)
|
|
{
|
|
obidebug(1, "\nError getting a LCA from taxid when building a reference database, taxid %d", taxid_lca);
|
|
return -1;
|
|
}
|
|
// Read alignment score
|
|
score = obi_get_float_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_score_column, i, 0);
|
|
|
|
///////////////// Compute for first sequence \\\\\\\\\\\\\\\\\\\\\\\ (TODO function)
|
|
|
|
// Read arrays
|
|
lca_taxid_array = (obiint_t*) obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, &taxid_array_length);
|
|
score_array = (obifloat_t*) obi_get_array_with_col_p_in_view(o_view, final_score_a_column, idx1, &score_array_length);
|
|
|
|
taxid_array_writable_length = taxid_array_length;
|
|
score_array_writable_length = score_array_length;
|
|
|
|
// Check that lengths are equal (TODO eventually remove?)
|
|
if (taxid_array_length != score_array_length)
|
|
{
|
|
obidebug(1, "\nError building a reference database: LCA taxid and score arrays' lengths are not equal (%d and %d)", taxid_array_length, score_array_length);
|
|
return -1;
|
|
}
|
|
|
|
// If empty, add values
|
|
if (taxid_array_length == 0)
|
|
{
|
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
|
{
|
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
|
return -1;
|
|
}
|
|
if (obi_set_array_with_col_p_in_view(o_view, final_score_a_column, idx1, &score, (uint8_t) (obi_sizeof(OBI_FLOAT) * 8), 1) < 0)
|
|
{
|
|
obidebug(1, "\nError setting a score array in a column when building a reference database");
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
j = 0;
|
|
modified = false;
|
|
while (j < taxid_array_length)
|
|
{
|
|
if (taxid_lca == lca_taxid_array[j]) // Same LCA taxid: replace if the similarity score is greater
|
|
{
|
|
if (score > score_array[j])
|
|
{
|
|
// Copy in array to make writable
|
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
|
modified = true;
|
|
|
|
// Better score for the same LCA, replace this LCA/score pair
|
|
lca_taxid_array_writable[j] = taxid_lca;
|
|
score_array_writable[j] = score;
|
|
|
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
|
{
|
|
for (k=j-1; k<taxid_array_writable_length-1; k++)
|
|
{
|
|
lca_taxid_array_writable[k] = lca_taxid_array_writable[k+1];
|
|
score_array_writable[k] = score_array_writable[k+1];
|
|
}
|
|
taxid_array_writable_length--;
|
|
score_array_writable_length--;
|
|
j--;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
else if (obi_taxo_is_taxon_under_taxid(lca, lca_taxid_array[j])) // Array LCA is a parent LCA
|
|
{
|
|
if (score > score_array[j])
|
|
{
|
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
|
modified = true;
|
|
|
|
// Insert new LCA/score pair
|
|
for (k=taxid_array_writable_length; k>=j+1; k--)
|
|
{
|
|
lca_taxid_array_writable[k] = lca_taxid_array_writable[k-1];
|
|
score_array_writable[k] = score_array_writable[k-1];
|
|
}
|
|
|
|
taxid_array_writable_length++;
|
|
score_array_writable_length++;
|
|
|
|
lca_taxid_array_writable[j] = taxid_lca;
|
|
score_array_writable[j] = score;
|
|
|
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
|
{
|
|
for (k=j-1; k<taxid_array_writable_length-1; k++)
|
|
{
|
|
lca_taxid_array_writable[k] = lca_taxid_array_writable[k+1];
|
|
score_array_writable[k] = score_array_writable[k+1];
|
|
}
|
|
taxid_array_writable_length--;
|
|
score_array_writable_length--;
|
|
j--;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
j++;
|
|
}
|
|
|
|
if (j == taxid_array_length) // same or parent LCA not found, need to be appended at the end
|
|
{
|
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
|
modified = true;
|
|
|
|
// Append LCA
|
|
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
|
score_array_writable[score_array_writable_length] = score;
|
|
|
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
|
{
|
|
for (k=j-1; k<taxid_array_writable_length-1; k++)
|
|
{
|
|
lca_taxid_array_writable[k] = lca_taxid_array_writable[k+1];
|
|
score_array_writable[k] = score_array_writable[k+1];
|
|
}
|
|
taxid_array_writable_length--;
|
|
score_array_writable_length--;
|
|
j--;
|
|
}
|
|
}
|
|
|
|
// Write new arrays
|
|
if (modified)
|
|
{
|
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, lca_taxid_array_writable, (uint8_t) (obi_sizeof(OBI_INT) * 8), taxid_array_writable_length) < 0)
|
|
{
|
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
|
return -1;
|
|
}
|
|
if (obi_set_array_with_col_p_in_view(o_view, final_score_a_column, idx1, score_array_writable, (uint8_t) (obi_sizeof(OBI_FLOAT) * 8), score_array_writable_length) < 0)
|
|
{
|
|
obidebug(1, "\nError setting a score array in a column when building a reference database");
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
///////////////// Compute for second sequence \\\\\\\\\\\\\\\\\\\\\\\ (TODO function)
|
|
|
|
// Read arrays
|
|
lca_taxid_array = (obiint_t*) obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, &taxid_array_length);
|
|
score_array = (obifloat_t*) obi_get_array_with_col_p_in_view(o_view, final_score_a_column, idx2, &score_array_length);
|
|
|
|
taxid_array_writable_length = taxid_array_length;
|
|
score_array_writable_length = score_array_length;
|
|
|
|
// Check that lengths are equal (TODO eventually remove?)
|
|
if (taxid_array_length != score_array_length)
|
|
{
|
|
obidebug(1, "\nError building a reference database: LCA taxid and score arrays' lengths are not equal (%d and %d)", taxid_array_length, score_array_length);
|
|
return -1;
|
|
}
|
|
|
|
// If empty, add values
|
|
if (taxid_array_length == 0)
|
|
{
|
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
|
{
|
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
|
return -1;
|
|
}
|
|
if (obi_set_array_with_col_p_in_view(o_view, final_score_a_column, idx2, &score, (uint8_t) (obi_sizeof(OBI_FLOAT) * 8), 1) < 0)
|
|
{
|
|
obidebug(1, "\nError setting a score array in a column when building a reference database");
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
j = 0;
|
|
modified = false;
|
|
while (j < taxid_array_length)
|
|
{
|
|
if (taxid_lca == lca_taxid_array[j]) // Same LCA taxid: replace if the similarity score is greater
|
|
{
|
|
if (score > score_array[j])
|
|
{
|
|
// Copy in array to make writable
|
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
|
modified = true;
|
|
|
|
// Better score for the same LCA, replace this LCA/score pair
|
|
lca_taxid_array_writable[j] = taxid_lca;
|
|
score_array_writable[j] = score;
|
|
|
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
|
{
|
|
for (k=j-1; k<taxid_array_writable_length-1; k++)
|
|
{
|
|
lca_taxid_array_writable[k] = lca_taxid_array_writable[k+1];
|
|
score_array_writable[k] = score_array_writable[k+1];
|
|
}
|
|
taxid_array_writable_length--;
|
|
score_array_writable_length--;
|
|
j--;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
else if (obi_taxo_is_taxon_under_taxid(lca, lca_taxid_array[j])) // Array LCA is a parent LCA
|
|
{
|
|
if (score > score_array[j])
|
|
{
|
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
|
modified = true;
|
|
|
|
// Insert new LCA/score pair
|
|
for (k=taxid_array_writable_length; k>=j+1; k--)
|
|
{
|
|
lca_taxid_array_writable[k] = lca_taxid_array_writable[k-1];
|
|
score_array_writable[k] = score_array_writable[k-1];
|
|
}
|
|
|
|
taxid_array_writable_length++;
|
|
score_array_writable_length++;
|
|
|
|
lca_taxid_array_writable[j] = taxid_lca;
|
|
score_array_writable[j] = score;
|
|
|
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
|
{
|
|
for (k=j-1; k<taxid_array_writable_length-1; k++)
|
|
{
|
|
lca_taxid_array_writable[k] = lca_taxid_array_writable[k+1];
|
|
score_array_writable[k] = score_array_writable[k+1];
|
|
}
|
|
taxid_array_writable_length--;
|
|
score_array_writable_length--;
|
|
j--;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
j++;
|
|
}
|
|
|
|
if (j == taxid_array_length) // same or parent LCA not found, need to be appended at the end
|
|
{
|
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
|
modified = true;
|
|
|
|
// Append LCA
|
|
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
|
score_array_writable[score_array_writable_length] = score;
|
|
|
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
|
{
|
|
for (k=j-1; k<taxid_array_writable_length-1; k++)
|
|
{
|
|
lca_taxid_array_writable[k] = lca_taxid_array_writable[k+1];
|
|
score_array_writable[k] = score_array_writable[k+1];
|
|
}
|
|
taxid_array_writable_length--;
|
|
score_array_writable_length--;
|
|
j--;
|
|
}
|
|
}
|
|
|
|
// Write new arrays
|
|
// Copy arrays for modification (can't edit in place, as it is stored in indexer data file)
|
|
if (modified)
|
|
{
|
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, lca_taxid_array_writable, (uint8_t) (obi_sizeof(OBI_INT) * 8), taxid_array_writable_length) < 0)
|
|
{
|
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
|
return -1;
|
|
}
|
|
if (obi_set_array_with_col_p_in_view(o_view, final_score_a_column, idx2, score_array_writable, (uint8_t) (obi_sizeof(OBI_FLOAT) * 8), score_array_writable_length) < 0)
|
|
{
|
|
obidebug(1, "\nError setting a score array in a column when building a reference database");
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Close views and DMS
|
|
if (obi_save_and_close_view(refs_view) < 0)
|
|
{
|
|
obidebug(1, "\nError closing the reference view after building a reference database");
|
|
return -1;
|
|
}
|
|
if (obi_save_and_close_view(matrix_with_lca_view) < 0)
|
|
{
|
|
obidebug(1, "\nError closing the matrix with LCA view after building a reference database");
|
|
return -1;
|
|
}
|
|
if (obi_save_and_close_view(o_view) < 0)
|
|
{
|
|
obidebug(1, "\nError closing the final view after building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Delete temporary views
|
|
if (obi_delete_view(dms, matrix_view_name) < 0)
|
|
{
|
|
obidebug(1, "\nError deleting temporary view %s after building a reference database", matrix_view_name);
|
|
return -1;
|
|
}
|
|
if (obi_delete_view(dms, matrix_with_lca_view_name) < 0)
|
|
{
|
|
obidebug(1, "\nError deleting temporary view %s after building a reference database", matrix_view_name);
|
|
return -1;
|
|
}
|
|
|
|
// Close DMS
|
|
if (obi_close_dms(dms, false) < 0)
|
|
{
|
|
obidebug(1, "\nError closing the DMS after building a reference database");
|
|
return -1;
|
|
}
|
|
|
|
// Free everything
|
|
free(matrix_view_name);
|
|
free(matrix_with_lca_view_name);
|
|
|
|
fprintf(stderr,"\rDone : 100 %% ");
|
|
return 0;
|
|
}
|
|
|