Sequence alignment: added the possibility to specify the index of the

sequences to align in a column containing multiple sequences per line (C
level for now)
This commit is contained in:
Celine Mercier
2016-11-29 16:15:02 +01:00
parent 5fb025f310
commit 98d0849653
6 changed files with 26 additions and 11 deletions

View File

@ -33,7 +33,7 @@
// what's with multiple sequences/line columns?
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, const char* seq_name,
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
double threshold, bool normalize, int reference, bool similarity_mode)
{
@ -47,6 +47,7 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
Obi_blob_p blob1;
Obi_blob_p blob2;
int lcs_min;
index_t seq_idx;
k = 0;
@ -65,8 +66,21 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
return -1;
}
// Get element index from element name to compute it only once
if (seq_name != NULL)
{
seq_idx = obi_column_get_element_index_from_name(seq_column, seq_name);
if (seq_idx == OBIIdx_NA)
{
obidebug(1, "\nError getting the sequence index in a column line when aligning");
return -1;
}
}
else
seq_idx = 0;
// Build kmer tables
ktable = hash_seq_column(seq_view, seq_column);
ktable = hash_seq_column(seq_view, seq_column, seq_idx);
if (ktable == NULL)
{
obi_set_errno(OBI_ALIGN_ERROR);
@ -86,8 +100,8 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
for (j=i+1; j < seq_count; j++)
{
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0);
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0);
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, seq_idx);
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, seq_idx);
if ((blob1 == NULL) || (blob2 == NULL))
{
@ -96,7 +110,7 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
}
// Check if the sequences are identical in a quick way (same index in the same indexer)
if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0))
if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, seq_idx) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, seq_idx))
{
if (similarity_mode && normalize)
score = 1.0;
@ -120,7 +134,7 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
{ // Print result
// Get sequence ids
id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);
id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); // TODO Could there be multiple IDs per line?
id2 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
// Write sequence ids in output view