2016-05-11 16:36:23 +02:00
|
|
|
/****************************************************************************
|
|
|
|
* Sequence alignment functions *
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file obi_align.c
|
|
|
|
* @author Celine Mercier
|
|
|
|
* @date May 4th 2016
|
|
|
|
* @brief Functions handling sequence alignments.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
|
|
|
#include "obidebug.h"
|
|
|
|
#include "obierrno.h"
|
|
|
|
#include "obitypes.h"
|
|
|
|
#include "obiview.h"
|
|
|
|
#include "sse_banded_LCS_alignment.h"
|
|
|
|
|
|
|
|
|
|
|
|
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
|
|
|
|
|
|
|
|
|
|
|
// TODO
|
2016-06-16 11:26:54 +02:00
|
|
|
// use openMP pragmas : garder scores en memoire et ecrire a la fin? (normalement c bon avec index)
|
2016-05-11 16:36:23 +02:00
|
|
|
// tout ecrire en stdint?
|
|
|
|
// check NUC_SEQS and score type (int or float if normalize)
|
|
|
|
// what's with multiple sequence/line columns?
|
|
|
|
// make function that put blobs in int16
|
|
|
|
|
|
|
|
|
|
|
|
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview_p score_view, OBIDMS_column_p score_column, double threshold, bool normalize, int reference, bool similarity_mode)
|
|
|
|
{
|
|
|
|
index_t i, j, k;
|
|
|
|
index_t seq_count;
|
|
|
|
char* seq1;
|
|
|
|
char* seq2;
|
|
|
|
double score;
|
|
|
|
|
|
|
|
k = 0;
|
|
|
|
|
|
|
|
if ((seq_column->header)->returned_data_type != OBI_SEQ)
|
|
|
|
{
|
|
|
|
obi_set_errno(OBI_ALIGN_ERROR);
|
|
|
|
obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) ||
|
|
|
|
(!normalize && ((score_column->header)->returned_data_type != OBI_INT)))
|
|
|
|
{
|
|
|
|
obi_set_errno(OBI_ALIGN_ERROR);
|
|
|
|
obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
seq_count = (seq_column->header)->lines_used;
|
|
|
|
|
|
|
|
for (i=0; i < (seq_count - 1); i++)
|
|
|
|
{
|
|
|
|
for (j=i+1; j < seq_count; j++)
|
|
|
|
{
|
|
|
|
//fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
|
|
|
|
|
|
|
|
seq1 = obi_column_get_obiseq_with_elt_idx_in_view(seq_view, seq_column, i, 0);
|
|
|
|
seq2 = obi_column_get_obiseq_with_elt_idx_in_view(seq_view, seq_column, j, 0);
|
|
|
|
|
|
|
|
if ((seq1 == NULL) || (seq2 == NULL))
|
|
|
|
{
|
|
|
|
obidebug(1, "\nError retrieving sequences to align");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO filter
|
|
|
|
score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
|
|
|
|
|
|
|
|
if (normalize)
|
|
|
|
{
|
|
|
|
if (obi_column_set_obifloat_with_elt_idx_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
|
|
|
|
{
|
|
|
|
obidebug(1, "\nError writing alignment score in a column");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (obi_column_set_obiint_with_elt_idx_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
|
|
|
|
{
|
|
|
|
obidebug(1, "\nError writing alignment score in a column");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
k++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int obi_align_two_columns(OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2, OBIDMS_column_p score_column, double threshold, bool normalize, int reference, bool similarity_mode)
|
|
|
|
{
|
|
|
|
// TODO
|
|
|
|
return 0;
|
|
|
|
}
|