2016-05-11 16:36:23 +02:00
/****************************************************************************
* Sequence alignment functions *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* @ file obi_align . c
* @ author Celine Mercier
* @ date May 4 th 2016
* @ brief Functions handling sequence alignments .
*/
# include <stdlib.h>
# include <stdio.h>
# include <stdbool.h>
# include "obidebug.h"
# include "obierrno.h"
# include "obitypes.h"
# include "obiview.h"
# include "sse_banded_LCS_alignment.h"
# define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO
2016-08-10 14:51:02 +02:00
// use openMP pragmas
// option pour ecrire en stdint?
// check NUC_SEQS view type? and score type (int or float if normalize)
// what's with multiple sequences/line columns?
2016-05-11 16:36:23 +02:00
// make function that put blobs in int16
2016-08-10 14:51:02 +02:00
int obi_align_one_column ( Obiview_p seq_view , OBIDMS_column_p seq_column ,
Obiview_p score_view , OBIDMS_column_p id1_column , OBIDMS_column_p id2_column , OBIDMS_column_p score_column ,
double threshold , bool normalize , int reference , bool similarity_mode )
2016-05-11 16:36:23 +02:00
{
index_t i , j , k ;
index_t seq_count ;
char * seq1 ;
char * seq2 ;
2016-08-10 14:51:02 +02:00
const char * id1 ;
const char * id2 ;
2016-05-11 16:36:23 +02:00
double score ;
2016-08-10 14:51:02 +02:00
OBIDMS_column_p id_column ;
2016-05-11 16:36:23 +02:00
k = 0 ;
if ( ( seq_column - > header ) - > returned_data_type ! = OBI_SEQ )
{
obi_set_errno ( OBI_ALIGN_ERROR ) ;
obidebug ( 1 , " \n Trying to align a column of a different type than OBI_SEQ " ) ;
return - 1 ;
}
if ( ( normalize & & ( ( score_column - > header ) - > returned_data_type ! = OBI_FLOAT ) ) | |
( ! normalize & & ( ( score_column - > header ) - > returned_data_type ! = OBI_INT ) ) )
{
obi_set_errno ( OBI_ALIGN_ERROR ) ;
obidebug ( 1 , " \n Trying to store alignment scores in a column of an inappropriate type " ) ;
return - 1 ;
}
2016-08-10 14:51:02 +02:00
// Get the ID column pointer
id_column = obi_view_get_column ( seq_view , ID_COLUMN ) ;
2016-05-11 16:36:23 +02:00
seq_count = ( seq_column - > header ) - > lines_used ;
for ( i = 0 ; i < ( seq_count - 1 ) ; i + + )
{
for ( j = i + 1 ; j < seq_count ; j + + )
{
//fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
2016-08-10 14:51:02 +02:00
seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view ( seq_view , seq_column , i , 0 ) ;
seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view ( seq_view , seq_column , j , 0 ) ;
2016-05-11 16:36:23 +02:00
if ( ( seq1 = = NULL ) | | ( seq2 = = NULL ) )
{
obidebug ( 1 , " \n Error retrieving sequences to align " ) ;
return - 1 ;
}
2016-08-10 14:51:02 +02:00
// TODO kmer filter
// Compute alignment score
2016-05-11 16:36:23 +02:00
score = generic_sse_banded_lcs_align ( seq1 , seq2 , threshold , normalize , reference , similarity_mode ) ;
2016-08-10 14:51:02 +02:00
// Get sequence ids
id1 = obi_get_str_with_elt_idx_and_col_p_in_view ( seq_view , id_column , i , 0 ) ;
id2 = obi_get_str_with_elt_idx_and_col_p_in_view ( seq_view , id_column , j , 0 ) ;
// Write sequence ids in output view
if ( obi_set_str_with_elt_idx_and_col_p_in_view ( score_view , id1_column , k , 0 , id1 ) < 0 )
{
obidebug ( 1 , " \n Error writing id1 in a column " ) ;
return - 1 ;
}
if ( obi_set_str_with_elt_idx_and_col_p_in_view ( score_view , id2_column , k , 0 , id2 ) < 0 )
{
obidebug ( 1 , " \n Error writing id2 in a column " ) ;
return - 1 ;
}
// Write score in output view
2016-05-11 16:36:23 +02:00
if ( normalize )
{
2016-08-10 14:51:02 +02:00
if ( obi_set_float_with_elt_idx_and_col_p_in_view ( score_view , score_column , k , 0 , ( obifloat_t ) score ) < 0 )
2016-05-11 16:36:23 +02:00
{
obidebug ( 1 , " \n Error writing alignment score in a column " ) ;
return - 1 ;
}
}
else
{
2016-08-10 14:51:02 +02:00
if ( obi_set_int_with_elt_idx_and_col_p_in_view ( score_view , score_column , k , 0 , ( obiint_t ) score ) < 0 )
2016-05-11 16:36:23 +02:00
{
obidebug ( 1 , " \n Error writing alignment score in a column " ) ;
return - 1 ;
}
}
2016-08-10 14:51:02 +02:00
free ( seq1 ) ;
free ( seq2 ) ;
2016-05-11 16:36:23 +02:00
k + + ;
}
}
return 0 ;
}
2016-08-10 14:51:02 +02:00
// TODO discuss if 2 input views or 2 columns or both possible
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2, // TODO it's implied both seq columns are in the same view but maybe it shouldn't
// Obiview_p score_view, OBIDMS_column_p score_column,
// double threshold, bool normalize, int reference, bool similarity_mode)
//{
// index_t i, j, k;
// index_t seq_count_1;
// index_t seq_count_2;
// char* seq1;
// char* seq2;
// double score;
//
// k = 0;
//
// if (((seq_column_1->header)->returned_data_type != OBI_SEQ) || ((seq_column_2->header)->returned_data_type != OBI_SEQ))
// {
// obi_set_errno(OBI_ALIGN_ERROR);
// obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ");
// return -1;
// }
//
// if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) ||
// (!normalize && ((score_column->header)->returned_data_type != OBI_INT)))
// {
// obi_set_errno(OBI_ALIGN_ERROR);
// obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type");
// return -1;
// }
//
// seq_count_1 = (seq_column_1->header)->lines_used;
// seq_count_2 = (seq_column_2->header)->lines_used;
//
// for (i=0; i < (seq_count_1 - 1); i++)
// {
// for (j=0; j < seq_count_2; j++)
// {
// //fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
//
// seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_1, i, 0);
// seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_2, j, 0);
//
// if ((seq1 == NULL) || (seq2 == NULL))
// {
// obidebug(1, "\nError retrieving sequences to align");
// return -1;
// }
//
// // TODO kmer filter
//
// score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
//
// if (normalize)
// {
// if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
// {
// obidebug(1, "\nError writing alignment score in a column");
// return -1;
// }
// }
// else
// {
// if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
// {
// obidebug(1, "\nError writing alignment score in a column");
// return -1;
// }
// }
//
// free(seq1);
// free(seq2);
//
// k++;
// }
// }
//
// return 0;
//}