/**************************************************************************** * Sequence alignment functions * ****************************************************************************/ /** * @file obi_align.c * @author Celine Mercier * @date May 4th 2016 * @brief Functions handling sequence alignments. */ #include #include #include #include "obidebug.h" #include "obierrno.h" #include "obitypes.h" #include "obiview.h" #include "sse_banded_LCS_alignment.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) // TODO // use openMP pragmas // option pour ecrire en stdint? // check NUC_SEQS view type? and score type (int or float if normalize) // what's with multiple sequences/line columns? // make function that put blobs in int16 int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column, double threshold, bool normalize, int reference, bool similarity_mode) { index_t i, j, k; index_t seq_count; char* seq1; char* seq2; const char* id1; const char* id2; double score; OBIDMS_column_p id_column; k = 0; if ((seq_column->header)->returned_data_type != OBI_SEQ) { obi_set_errno(OBI_ALIGN_ERROR); obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ"); return -1; } if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) || (!normalize && ((score_column->header)->returned_data_type != OBI_INT))) { obi_set_errno(OBI_ALIGN_ERROR); obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type"); return -1; } // Get the ID column pointer id_column = obi_view_get_column(seq_view, ID_COLUMN); seq_count = (seq_column->header)->lines_used; for (i=0; i < (seq_count - 1); i++) { for (j=i+1; j < seq_count; j++) { //fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k); seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0); seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0); if ((seq1 == NULL) || (seq2 == NULL)) { obidebug(1, "\nError retrieving sequences to align"); return -1; } // TODO kmer filter // Compute alignment score score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode); // Get sequence ids id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); id2 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0); // Write sequence ids in output view if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id1_column, k, 0, id1) < 0) { obidebug(1, "\nError writing id1 in a column"); return -1; } if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id2_column, k, 0, id2) < 0) { obidebug(1, "\nError writing id2 in a column"); return -1; } // Write score in output view if (normalize) { if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0) { obidebug(1, "\nError writing alignment score in a column"); return -1; } } else { if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0) { obidebug(1, "\nError writing alignment score in a column"); return -1; } } free(seq1); free(seq2); k++; } } return 0; } // TODO discuss if 2 input views or 2 columns or both possible //int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2, // TODO it's implied both seq columns are in the same view but maybe it shouldn't // Obiview_p score_view, OBIDMS_column_p score_column, // double threshold, bool normalize, int reference, bool similarity_mode) //{ // index_t i, j, k; // index_t seq_count_1; // index_t seq_count_2; // char* seq1; // char* seq2; // double score; // // k = 0; // // if (((seq_column_1->header)->returned_data_type != OBI_SEQ) || ((seq_column_2->header)->returned_data_type != OBI_SEQ)) // { // obi_set_errno(OBI_ALIGN_ERROR); // obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ"); // return -1; // } // // if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) || // (!normalize && ((score_column->header)->returned_data_type != OBI_INT))) // { // obi_set_errno(OBI_ALIGN_ERROR); // obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type"); // return -1; // } // // seq_count_1 = (seq_column_1->header)->lines_used; // seq_count_2 = (seq_column_2->header)->lines_used; // // for (i=0; i < (seq_count_1 - 1); i++) // { // for (j=0; j < seq_count_2; j++) // { // //fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k); // // seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_1, i, 0); // seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_2, j, 0); // // if ((seq1 == NULL) || (seq2 == NULL)) // { // obidebug(1, "\nError retrieving sequences to align"); // return -1; // } // // // TODO kmer filter // // score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode); // // if (normalize) // { // if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0) // { // obidebug(1, "\nError writing alignment score in a column"); // return -1; // } // } // else // { // if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0) // { // obidebug(1, "\nError writing alignment score in a column"); // return -1; // } // } // // free(seq1); // free(seq2); // // k++; // } // } // // return 0; //}