Added the kmer filter to LCS alignments, and now obiblobs containing
encoded sequences are directly put in int16_t arrays for the alignment
This commit is contained in:
108
src/obi_align.c
108
src/obi_align.c
@ -19,6 +19,8 @@
|
||||
#include "obitypes.h"
|
||||
#include "obiview.h"
|
||||
#include "sse_banded_LCS_alignment.h"
|
||||
#include "upperband.h"
|
||||
#include "obiblob.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
@ -29,21 +31,22 @@
|
||||
// option pour ecrire en stdint?
|
||||
// check NUC_SEQS view type? and score type (int or float if normalize)
|
||||
// what's with multiple sequences/line columns?
|
||||
// make function that put blobs in int16
|
||||
|
||||
|
||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode)
|
||||
{
|
||||
index_t i, j, k;
|
||||
index_t seq_count;
|
||||
char* seq1;
|
||||
char* seq2;
|
||||
const char* id1;
|
||||
const char* id2;
|
||||
double score;
|
||||
index_t i, j, k;
|
||||
index_t seq_count;
|
||||
const char* id1;
|
||||
const char* id2;
|
||||
double score;
|
||||
OBIDMS_column_p id_column;
|
||||
Kmer_table_p ktable;
|
||||
Obi_blob_p blob1;
|
||||
Obi_blob_p blob2;
|
||||
int lcs_min;
|
||||
|
||||
k = 0;
|
||||
|
||||
@ -62,6 +65,15 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Build kmer tables
|
||||
ktable = hash_seq_column(seq_view, seq_column);
|
||||
if (ktable == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_ALIGN_ERROR);
|
||||
obidebug(1, "\nError building kmer tables before aligning");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get the ID column pointer
|
||||
id_column = obi_view_get_column(seq_view, ID_COLUMN);
|
||||
|
||||
@ -69,66 +81,72 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||
|
||||
for (i=0; i < (seq_count - 1); i++)
|
||||
{
|
||||
if (i%100 == 0)
|
||||
fprintf(stderr,"\rDone : %f %% ", (i / (float) seq_count)*100);
|
||||
|
||||
for (j=i+1; j < seq_count; j++)
|
||||
{
|
||||
//fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
|
||||
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0);
|
||||
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0);
|
||||
|
||||
seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0);
|
||||
seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0);
|
||||
|
||||
if ((seq1 == NULL) || (seq2 == NULL))
|
||||
if ((blob1 == NULL) || (blob2 == NULL))
|
||||
{
|
||||
obidebug(1, "\nError retrieving sequences to align");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO kmer filter
|
||||
// kmer filter
|
||||
align_filters(ktable, blob1, blob2, i, j, threshold, normalize, reference, similarity_mode, &score, &lcs_min);
|
||||
|
||||
// Compute alignment score
|
||||
score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
|
||||
if ((threshold == 0) || (score == -1.0)) // no threshold or filter passed, and sequences not identical: align
|
||||
score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode);
|
||||
|
||||
// Get sequence ids
|
||||
id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);
|
||||
id2 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
|
||||
if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
|
||||
{ // Print result
|
||||
|
||||
// Write sequence ids in output view
|
||||
if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id1_column, k, 0, id1) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing id1 in a column");
|
||||
return -1;
|
||||
}
|
||||
// Get sequence ids
|
||||
id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);
|
||||
id2 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
|
||||
|
||||
if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id2_column, k, 0, id2) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing id2 in a column");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write score in output view
|
||||
if (normalize)
|
||||
{
|
||||
if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
|
||||
// Write sequence ids in output view
|
||||
if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id1_column, k, 0, id1) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing alignment score in a column");
|
||||
obidebug(1, "\nError writing id1 in a column");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
|
||||
|
||||
if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id2_column, k, 0, id2) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing alignment score in a column");
|
||||
obidebug(1, "\nError writing id2 in a column");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write score in output view
|
||||
if (normalize)
|
||||
{
|
||||
if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing alignment score in a column");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing alignment score in a column");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
k++;
|
||||
}
|
||||
|
||||
free(seq1);
|
||||
free(seq2);
|
||||
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
free_kmer_tables(ktable, seq_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user