215 lines
5.8 KiB
C
215 lines
5.8 KiB
C
/****************************************************************************
|
|
* Sequence alignment functions *
|
|
****************************************************************************/
|
|
|
|
/**
|
|
* @file obi_align.c
|
|
* @author Celine Mercier
|
|
* @date May 4th 2016
|
|
* @brief Functions handling sequence alignments.
|
|
*/
|
|
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <stdbool.h>
|
|
|
|
#include "obidebug.h"
|
|
#include "obierrno.h"
|
|
#include "obitypes.h"
|
|
#include "obiview.h"
|
|
#include "sse_banded_LCS_alignment.h"
|
|
|
|
|
|
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
|
|
|
|
|
// TODO
|
|
// use openMP pragmas
|
|
// option pour ecrire en stdint?
|
|
// check NUC_SEQS view type? and score type (int or float if normalize)
|
|
// what's with multiple sequences/line columns?
|
|
// make function that put blobs in int16
|
|
|
|
|
|
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
|
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
|
|
double threshold, bool normalize, int reference, bool similarity_mode)
|
|
{
|
|
index_t i, j, k;
|
|
index_t seq_count;
|
|
char* seq1;
|
|
char* seq2;
|
|
const char* id1;
|
|
const char* id2;
|
|
double score;
|
|
OBIDMS_column_p id_column;
|
|
|
|
k = 0;
|
|
|
|
if ((seq_column->header)->returned_data_type != OBI_SEQ)
|
|
{
|
|
obi_set_errno(OBI_ALIGN_ERROR);
|
|
obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ");
|
|
return -1;
|
|
}
|
|
|
|
if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) ||
|
|
(!normalize && ((score_column->header)->returned_data_type != OBI_INT)))
|
|
{
|
|
obi_set_errno(OBI_ALIGN_ERROR);
|
|
obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type");
|
|
return -1;
|
|
}
|
|
|
|
// Get the ID column pointer
|
|
id_column = obi_view_get_column(seq_view, ID_COLUMN);
|
|
|
|
seq_count = (seq_column->header)->lines_used;
|
|
|
|
for (i=0; i < (seq_count - 1); i++)
|
|
{
|
|
for (j=i+1; j < seq_count; j++)
|
|
{
|
|
//fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
|
|
|
|
seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0);
|
|
seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0);
|
|
|
|
if ((seq1 == NULL) || (seq2 == NULL))
|
|
{
|
|
obidebug(1, "\nError retrieving sequences to align");
|
|
return -1;
|
|
}
|
|
|
|
// TODO kmer filter
|
|
|
|
// Compute alignment score
|
|
score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
|
|
|
|
// Get sequence ids
|
|
id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);
|
|
id2 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
|
|
|
|
// Write sequence ids in output view
|
|
if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id1_column, k, 0, id1) < 0)
|
|
{
|
|
obidebug(1, "\nError writing id1 in a column");
|
|
return -1;
|
|
}
|
|
|
|
if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id2_column, k, 0, id2) < 0)
|
|
{
|
|
obidebug(1, "\nError writing id2 in a column");
|
|
return -1;
|
|
}
|
|
|
|
// Write score in output view
|
|
if (normalize)
|
|
{
|
|
if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
|
|
{
|
|
obidebug(1, "\nError writing alignment score in a column");
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
|
|
{
|
|
obidebug(1, "\nError writing alignment score in a column");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
free(seq1);
|
|
free(seq2);
|
|
|
|
k++;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
// TODO discuss if 2 input views or 2 columns or both possible
|
|
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2, // TODO it's implied both seq columns are in the same view but maybe it shouldn't
|
|
// Obiview_p score_view, OBIDMS_column_p score_column,
|
|
// double threshold, bool normalize, int reference, bool similarity_mode)
|
|
//{
|
|
// index_t i, j, k;
|
|
// index_t seq_count_1;
|
|
// index_t seq_count_2;
|
|
// char* seq1;
|
|
// char* seq2;
|
|
// double score;
|
|
//
|
|
// k = 0;
|
|
//
|
|
// if (((seq_column_1->header)->returned_data_type != OBI_SEQ) || ((seq_column_2->header)->returned_data_type != OBI_SEQ))
|
|
// {
|
|
// obi_set_errno(OBI_ALIGN_ERROR);
|
|
// obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ");
|
|
// return -1;
|
|
// }
|
|
//
|
|
// if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) ||
|
|
// (!normalize && ((score_column->header)->returned_data_type != OBI_INT)))
|
|
// {
|
|
// obi_set_errno(OBI_ALIGN_ERROR);
|
|
// obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type");
|
|
// return -1;
|
|
// }
|
|
//
|
|
// seq_count_1 = (seq_column_1->header)->lines_used;
|
|
// seq_count_2 = (seq_column_2->header)->lines_used;
|
|
//
|
|
// for (i=0; i < (seq_count_1 - 1); i++)
|
|
// {
|
|
// for (j=0; j < seq_count_2; j++)
|
|
// {
|
|
// //fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
|
|
//
|
|
// seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_1, i, 0);
|
|
// seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_2, j, 0);
|
|
//
|
|
// if ((seq1 == NULL) || (seq2 == NULL))
|
|
// {
|
|
// obidebug(1, "\nError retrieving sequences to align");
|
|
// return -1;
|
|
// }
|
|
//
|
|
// // TODO kmer filter
|
|
//
|
|
// score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
|
|
//
|
|
// if (normalize)
|
|
// {
|
|
// if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
|
|
// {
|
|
// obidebug(1, "\nError writing alignment score in a column");
|
|
// return -1;
|
|
// }
|
|
// }
|
|
// else
|
|
// {
|
|
// if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
|
|
// {
|
|
// obidebug(1, "\nError writing alignment score in a column");
|
|
// return -1;
|
|
// }
|
|
// }
|
|
//
|
|
// free(seq1);
|
|
// free(seq2);
|
|
//
|
|
// k++;
|
|
// }
|
|
// }
|
|
//
|
|
// return 0;
|
|
//}
|
|
|
|
|