Sequence alignment: added the possibility to specify the index of the
sequences to align in a column containing multiple sequences per line (C level for now)
This commit is contained in:
@ -581,7 +581,7 @@ cdef class OBIView_NUC_SEQS(OBIView):
|
||||
id1_col_p = id1_col_pp[0]
|
||||
id2_col_p = id2_col_pp[0]
|
||||
|
||||
if obi_align_one_column(iview1_p, icol1_p, oview_p, id1_col_p, id2_col_p, ocol_p, threshold, normalize, reference, similarity_mode) < 0 :
|
||||
if obi_align_one_column(iview1_p, icol1_p, NULL, oview_p, id1_col_p, id2_col_p, ocol_p, threshold, normalize, reference, similarity_mode) < 0 :
|
||||
raise Exception("Error aligning sequences")
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ cdef extern from "obi_align.h" nogil:
|
||||
|
||||
int obi_align_one_column(Obiview_p seq_view,
|
||||
OBIDMS_column_p seq_column,
|
||||
const char* seq_name,
|
||||
Obiview_p score_view,
|
||||
OBIDMS_column_p id1_column,
|
||||
OBIDMS_column_p id2_column,
|
||||
|
@ -33,7 +33,7 @@
|
||||
// what's with multiple sequences/line columns?
|
||||
|
||||
|
||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, const char* seq_name,
|
||||
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode)
|
||||
{
|
||||
@ -47,6 +47,7 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
Obi_blob_p blob1;
|
||||
Obi_blob_p blob2;
|
||||
int lcs_min;
|
||||
index_t seq_idx;
|
||||
|
||||
k = 0;
|
||||
|
||||
@ -65,8 +66,21 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get element index from element name to compute it only once
|
||||
if (seq_name != NULL)
|
||||
{
|
||||
seq_idx = obi_column_get_element_index_from_name(seq_column, seq_name);
|
||||
if (seq_idx == OBIIdx_NA)
|
||||
{
|
||||
obidebug(1, "\nError getting the sequence index in a column line when aligning");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
seq_idx = 0;
|
||||
|
||||
// Build kmer tables
|
||||
ktable = hash_seq_column(seq_view, seq_column);
|
||||
ktable = hash_seq_column(seq_view, seq_column, seq_idx);
|
||||
if (ktable == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_ALIGN_ERROR);
|
||||
@ -86,8 +100,8 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
|
||||
for (j=i+1; j < seq_count; j++)
|
||||
{
|
||||
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0);
|
||||
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0);
|
||||
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, seq_idx);
|
||||
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, seq_idx);
|
||||
|
||||
if ((blob1 == NULL) || (blob2 == NULL))
|
||||
{
|
||||
@ -96,7 +110,7 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
}
|
||||
|
||||
// Check if the sequences are identical in a quick way (same index in the same indexer)
|
||||
if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0))
|
||||
if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, seq_idx) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, seq_idx))
|
||||
{
|
||||
if (similarity_mode && normalize)
|
||||
score = 1.0;
|
||||
@ -120,7 +134,7 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
{ // Print result
|
||||
|
||||
// Get sequence ids
|
||||
id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);
|
||||
id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); // TODO Could there be multiple IDs per line?
|
||||
id2 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
|
||||
|
||||
// Write sequence ids in output view
|
||||
|
@ -49,7 +49,7 @@
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, const char* seq_name,
|
||||
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode);
|
||||
|
||||
|
@ -232,7 +232,7 @@ int thresholdLCS4(int32_t reflen, int32_t lcs)
|
||||
}
|
||||
|
||||
|
||||
Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col)
|
||||
Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t seq_idx)
|
||||
{
|
||||
size_t i;
|
||||
size_t seq_count;
|
||||
@ -251,7 +251,7 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col)
|
||||
|
||||
for (i=0; i < seq_count; i++)
|
||||
{
|
||||
seq = obi_get_seq_with_elt_idx_and_col_p_in_view(view, seq_col, i, 0); // TODO discuss 1 element per line mandatory
|
||||
seq = obi_get_seq_with_elt_idx_and_col_p_in_view(view, seq_col, i, seq_idx); // TODO discuss 1 element per line mandatory
|
||||
if (seq == NULL)
|
||||
return NULL; // TODO or not
|
||||
ktable[i].table = malloc(256 * sizeof(unsigned char));
|
||||
|
@ -18,7 +18,7 @@ typedef struct {
|
||||
} Kmer_table_t, *Kmer_table_p;
|
||||
|
||||
|
||||
Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col);
|
||||
Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t seq_idx);
|
||||
void align_filters(Kmer_table_p ktable, Obi_blob_p seq1, Obi_blob_p seq2, index_t idx1, index_t idx2, double threshold, bool normalize, int reference, bool similarity_mode, double* score, int* LCSmin, bool can_be_identical);
|
||||
void free_kmer_tables(Kmer_table_p ktable, size_t count);
|
||||
|
||||
|
Reference in New Issue
Block a user