2016-05-11 16:36:23 +02:00
/****************************************************************************
* Sequence alignment functions *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* @ file obi_align . c
* @ author Celine Mercier
* @ date May 4 th 2016
* @ brief Functions handling sequence alignments .
*/
# include <stdlib.h>
# include <stdio.h>
# include <stdbool.h>
2016-12-12 11:58:59 +01:00
# include "obi_align.h"
2016-05-11 16:36:23 +02:00
# include "obidebug.h"
# include "obierrno.h"
# include "obitypes.h"
# include "obiview.h"
# include "sse_banded_LCS_alignment.h"
2016-11-18 16:29:28 +01:00
# include "upperband.h"
# include "obiblob.h"
2016-05-11 16:36:23 +02:00
# define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO
2016-08-10 14:51:02 +02:00
// use openMP pragmas
2016-05-11 16:36:23 +02:00
2016-12-13 17:18:12 +01:00
/**************************************************************************
*
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* @ brief Internal function creating the columns where the alignment results are written .
*
* @ param output_view A pointer on the writable view where the columns should be created .
* @ param id1_indexer_name The name of the indexer where the id of the 1 st sequence aligned is indexed .
* @ param id2_indexer_name The name of the indexer where the id of the 2 nd sequence aligned is indexed .
* @ param seq1_indexer_name The name of the indexer where the 1 st sequence aligned is indexed ( needed only if print_seq is True ) .
* @ param seq2_indexer_name The name of the indexer where the 2 nd sequence aligned is indexed ( needed only if print_seq is True ) .
* @ param print_seq A boolean indicating whether the aligned sequences should be copied in the output view .
* @ param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view .
* @ param normalize Whether the score should be normalized with the reference sequence length .
* @ param reference The reference length . 0 : The alignement length ; 1 : The longest sequence ' s length ; 2 : The shortest sequence ' s length .
* @ param similarity_mode Whether the score should be expressed in similarity ( true ) or distance ( false ) .
*
* @ retval 0 if the operation was successfully completed .
* @ retval - 1 if an error occurred .
*
* @ since December 2016
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int create_alignment_output_columns ( Obiview_p output_view ,
const char * id1_indexer_name ,
const char * id2_indexer_name ,
const char * seq1_indexer_name ,
const char * seq2_indexer_name ,
bool print_seq , bool print_count ,
bool normalize , int reference , bool similarity_mode ) ;
/**
* @ brief Internal function printing the result of one alignment to the output view .
*
* @ param output_view A pointer on the writable view where the columns should be created .
* @ param line The line in the output view where the result should be written .
* @ param idx1_column A pointer on the column where the index referring to the line of the first sequence aligned in the input view should be written .
* @ param idx2_column A pointer on the column where the index referring to the line of the second sequence aligned in the input view should be written .
* @ param idx1 The index referring to the line of the first sequence aligned in the input view .
* @ param idx2 The index referring to the line of the second sequence aligned in the input view .
* @ param id1_column A pointer on the column where the identifier of the first sequence aligned should be written .
* @ param id2_column A pointer on the column where the identifier of the second sequence aligned should be written .
* @ param id1_idx The index of the identifier of the first sequence aligned .
* @ param id2_idx The index of the identifier of the second sequence aligned .
* @ param print_seq A boolean indicating whether the aligned sequences should be copied in the output view .
* @ param seq1_column A pointer on the column where the first sequence aligned should be written .
* @ param seq2_column A pointer on the column where the second sequence aligned should be written .
* @ param seq1_idx The index of the sequence of the first sequence aligned .
* @ param seq2_idx The index of the sequence of the second sequence aligned .
* @ param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view . // Count columns not implement yet
* @ param count1_column A pointer on the column where the count of the first sequence aligned should be written .
* @ param count2_column A pointer on the column where the count of the second sequence aligned should be written .
* @ param count1 The count of the first sequence aligned .
* @ param count2 The count of the second sequence aligned .
* @ param ali_length_column A pointer on the column where the alignment length should be written .
* @ param ali_length The alignment length .
* @ param lcs_length_column A pointer on the column where the LCS length should be written .
* @ param lcs_length The LCS length .
* @ param score_column A pointer on the column where the score should be written .
* @ param score The alignment score .
* @ param reference The reference length . 0 : The alignment length ; 1 : The longest sequence ' s length ; 2 : The shortest sequence ' s length .
* @ param normalize Whether the score should be normalized with the reference sequence length .
* @ param similarity_mode Whether the score should be expressed in similarity ( true ) or distance ( false ) .
*
* @ retval 0 if the operation was successfully completed .
* @ retval - 1 if an error occurred .
*
* @ since December 2016
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
static int print_alignment_result ( Obiview_p output_view ,
index_t line ,
OBIDMS_column_p idx1_column ,
OBIDMS_column_p idx2_column ,
index_t idx1 ,
index_t idx2 ,
OBIDMS_column_p id1_column ,
OBIDMS_column_p id2_column ,
index_t id1_idx ,
index_t id2_idx ,
bool print_seq ,
OBIDMS_column_p seq1_column ,
OBIDMS_column_p seq2_column ,
index_t seq1_idx ,
index_t seq2_idx ,
// bool print_count,
// OBIDMS_column_p count1_column,
// OBIDMS_column_p count2_column,
// int count1,
// int count2,
OBIDMS_column_p ali_length_column ,
int ali_length ,
OBIDMS_column_p lcs_length_column ,
int lcs_length ,
OBIDMS_column_p score_column ,
double score ,
int reference ,
bool normalize ,
bool similarity_mode ) ;
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int create_alignment_output_columns ( Obiview_p output_view ,
const char * id1_indexer_name ,
const char * id2_indexer_name ,
const char * seq1_indexer_name ,
const char * seq2_indexer_name ,
bool print_seq , bool print_count ,
bool normalize , int reference , bool similarity_mode )
{
// Create the column for the ids of the 1st sequence aligned
if ( obi_view_add_column ( output_view , ID1_COLUMN_NAME , - 1 , ID1_COLUMN_NAME , OBI_STR , 0 , 1 , NULL , id1_indexer_name , NULL , - 1 , ID1_COLUMN_COMMENTS , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the first column for the sequence ids when aligning " ) ;
return - 1 ;
}
// Create the column for the ids of the 2nd sequence aligned
if ( obi_view_add_column ( output_view , ID2_COLUMN_NAME , - 1 , ID2_COLUMN_NAME , OBI_STR , 0 , 1 , NULL , id2_indexer_name , NULL , - 1 , ID2_COLUMN_COMMENTS , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the second column for the sequence ids when aligning " ) ;
return - 1 ;
}
// Create the column for the index (in the input view) of the first sequences aligned
if ( obi_view_add_column ( output_view , IDX1_COLUMN_NAME , - 1 , IDX1_COLUMN_NAME , OBI_INT , 0 , 1 , NULL , NULL , NULL , - 1 , IDX1_COLUMN_COMMENTS , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the first column for the sequence indices when aligning " ) ;
return - 1 ;
}
// Create the column for the index (in the input view) of the second sequences aligned
if ( obi_view_add_column ( output_view , IDX2_COLUMN_NAME , - 1 , IDX2_COLUMN_NAME , OBI_INT , 0 , 1 , NULL , NULL , NULL , - 1 , IDX2_COLUMN_COMMENTS , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the second column for the sequence indices when aligning " ) ;
return - 1 ;
}
// Create the column for the LCS length
if ( obi_view_add_column ( output_view , LCS_LENGTH_COLUMN_NAME , - 1 , LCS_LENGTH_COLUMN_NAME , OBI_INT , 0 , 1 , NULL , NULL , NULL , - 1 , LCS_LENGTH_COLUMN_COMMENTS , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the column for the LCS length when aligning " ) ;
return - 1 ;
}
// Create the column for the alignment length if it is computed
if ( ( reference = = ALILEN ) & & ( normalize | | ! similarity_mode ) )
{
if ( obi_view_add_column ( output_view , ALI_LENGTH_COLUMN_NAME , - 1 , ALI_LENGTH_COLUMN_NAME , OBI_INT , 0 , 1 , NULL , NULL , NULL , - 1 , ALI_LENGTH_COLUMN_COMMENTS , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the column for the alignment length when aligning " ) ;
return - 1 ;
}
}
// Create the column for the alignment score
if ( normalize )
{
if ( obi_view_add_column ( output_view , SCORE_COLUMN_NAME , - 1 , SCORE_COLUMN_NAME , OBI_FLOAT , 0 , 1 , NULL , NULL , NULL , - 1 , SCORE_COLUMN_NAME , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the column for the score when aligning " ) ;
return - 1 ;
}
}
else
{
if ( obi_view_add_column ( output_view , SCORE_COLUMN_NAME , - 1 , SCORE_COLUMN_NAME , OBI_INT , 0 , 1 , NULL , NULL , NULL , - 1 , SCORE_COLUMN_NAME , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the column for the score when aligning " ) ;
return - 1 ;
}
}
if ( print_seq )
{
// Create the column for the first sequences aligned
if ( obi_view_add_column ( output_view , SEQ1_COLUMN_NAME , - 1 , SEQ1_COLUMN_NAME , OBI_SEQ , 0 , 1 , NULL , seq1_indexer_name , NULL , - 1 , SEQ1_COLUMN_COMMENTS , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the first column for the sequences when aligning " ) ;
return - 1 ;
}
// Create the column for the second sequences aligned
if ( obi_view_add_column ( output_view , SEQ2_COLUMN_NAME , - 1 , SEQ2_COLUMN_NAME , OBI_SEQ , 0 , 1 , NULL , seq2_indexer_name , NULL , - 1 , SEQ2_COLUMN_COMMENTS , true ) < 0 )
{
obidebug ( 1 , " \n Error creating the second column for the sequences when aligning " ) ;
return - 1 ;
}
}
// if (print_count) // TODO count columns not implemented yet
// {
// // Create the column for the count of the first sequences aligned
// if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, COUNT1_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0)
// {
// obidebug(1, "\nError creating the first column for the sequence counts when aligning");
// return -1;
// }
//
// // Create the column for the count of the second sequences aligned
// if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, COUNT2_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0)
// {
// obidebug(1, "\nError creating the second column for the sequence counts when aligning");
// return -1;
// }
// }
return 0 ;
}
static int print_alignment_result ( Obiview_p output_view ,
index_t line ,
OBIDMS_column_p idx1_column ,
OBIDMS_column_p idx2_column ,
index_t idx1 ,
index_t idx2 ,
OBIDMS_column_p id1_column ,
OBIDMS_column_p id2_column ,
index_t id1_idx ,
index_t id2_idx ,
bool print_seq ,
OBIDMS_column_p seq1_column ,
OBIDMS_column_p seq2_column ,
index_t seq1_idx ,
index_t seq2_idx ,
// bool print_count,
// OBIDMS_column_p count1_column,
// OBIDMS_column_p count2_column,
// int count1,
// int count2,
OBIDMS_column_p ali_length_column ,
int ali_length ,
OBIDMS_column_p lcs_length_column ,
int lcs_length ,
OBIDMS_column_p score_column ,
double score ,
int reference ,
bool normalize ,
bool similarity_mode )
{
// Write line indices of the input view in the output view (to easily refer to the input sequences from the output view)
if ( obi_set_int_with_elt_idx_and_col_p_in_view ( output_view , idx1_column , line , 0 , idx1 ) < 0 )
{
obidebug ( 1 , " \n Error writing idx1 in a column " ) ;
return - 1 ;
}
if ( obi_set_int_with_elt_idx_and_col_p_in_view ( output_view , idx2_column , line , 0 , idx2 ) < 0 )
{
obidebug ( 1 , " \n Error writing idx2 in a column " ) ;
return - 1 ;
}
// Write ids in output view
if ( obi_set_index_with_elt_idx_and_col_p_in_view ( output_view , id1_column , line , 0 , id1_idx ) < 0 )
{
obidebug ( 1 , " \n Error writing id1 in a column " ) ;
return - 1 ;
}
if ( obi_set_index_with_elt_idx_and_col_p_in_view ( output_view , id2_column , line , 0 , id2_idx ) < 0 )
{
obidebug ( 1 , " \n Error writing id2 in a column " ) ;
return - 1 ;
}
// Write the sequences if needed
if ( print_seq )
{
if ( obi_set_index_with_elt_idx_and_col_p_in_view ( output_view , seq1_column , line , 0 , seq1_idx ) < 0 )
{
obidebug ( 1 , " \n Error writing seq1 in a column " ) ;
return - 1 ;
}
if ( obi_set_index_with_elt_idx_and_col_p_in_view ( output_view , seq2_column , line , 0 , seq2_idx ) < 0 )
{
obidebug ( 1 , " \n Error writing seq2 in a column " ) ;
return - 1 ;
}
}
// // Write the counts if needed // TODO count columns not implemented yet
// if (print_count)
// {
// if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0)
// {
// obidebug(1, "\nError writing count1 in a column");
// return -1;
// }
//
// if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0)
// {
// obidebug(1, "\nError writing count2 in a column");
// return -1;
// }
// }
// Write the alignment length if it was computed
if ( ( reference = = ALILEN ) & & ( normalize | | ! similarity_mode ) )
{
if ( obi_set_int_with_elt_idx_and_col_p_in_view ( output_view , ali_length_column , line , 0 , ali_length ) < 0 )
{
obidebug ( 1 , " \n Error writing alignment length in a column " ) ;
return - 1 ;
}
}
// Write the LCS length
if ( obi_set_int_with_elt_idx_and_col_p_in_view ( output_view , lcs_length_column , line , 0 , lcs_length ) < 0 )
{
obidebug ( 1 , " \n Error writing LCS length in a column " ) ;
return - 1 ;
}
// Write score
if ( normalize )
{
if ( obi_set_float_with_elt_idx_and_col_p_in_view ( output_view , score_column , line , 0 , ( obifloat_t ) score ) < 0 )
{
obidebug ( 1 , " \n Error writing alignment score in a column " ) ;
return - 1 ;
}
}
else
{
if ( obi_set_int_with_elt_idx_and_col_p_in_view ( output_view , score_column , line , 0 , ( obiint_t ) score ) < 0 )
{
obidebug ( 1 , " \n Error writing alignment score in a column " ) ;
return - 1 ;
}
}
return 0 ;
}
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2016-12-12 11:58:59 +01:00
int obi_lcs_align_one_column ( OBIDMS_p dms , const char * seq_view_name , const char * seq_column_name , const char * seq_elt_name ,
const char * id_column_name ,
const char * output_view_name , const char * output_view_comments ,
bool print_seq , bool print_count ,
double threshold , bool normalize , int reference , bool similarity_mode )
2016-05-11 16:36:23 +02:00
{
2016-11-18 16:29:28 +01:00
index_t i , j , k ;
index_t seq_count ;
2016-12-12 11:58:59 +01:00
index_t id1_idx , id2_idx ;
index_t seq1_idx , seq2_idx ;
2016-11-18 16:29:28 +01:00
double score ;
2016-12-12 11:58:59 +01:00
int lcs_length ;
int ali_length ;
2016-11-18 16:29:28 +01:00
Kmer_table_p ktable ;
Obi_blob_p blob1 ;
Obi_blob_p blob2 ;
int lcs_min ;
2016-12-12 11:58:59 +01:00
index_t seq_elt_idx ;
Obiview_p seq_view = NULL ;
Obiview_p output_view = NULL ;
OBIDMS_column_p iseq_column = NULL ;
OBIDMS_column_p id_column ;
OBIDMS_column_p id1_column = NULL ;
OBIDMS_column_p id2_column = NULL ;
OBIDMS_column_p seq1_column = NULL ;
OBIDMS_column_p seq2_column = NULL ;
//OBIDMS_column_p count1_column = NULL;
//OBIDMS_column_p count2_column = NULL;
OBIDMS_column_p idx1_column = NULL ;
OBIDMS_column_p idx2_column = NULL ;
OBIDMS_column_p lcs_length_column = NULL ;
OBIDMS_column_p ali_length_column = NULL ;
OBIDMS_column_p score_column = NULL ;
2016-05-11 16:36:23 +02:00
k = 0 ;
2016-12-12 11:58:59 +01:00
// Open input view
seq_view = obi_open_view ( dms , seq_view_name ) ;
if ( seq_view = = NULL )
{
obidebug ( 1 , " \n Error opening the input view to align " ) ;
return - 1 ;
}
// Open the sequence column to align
// If a column name wasn't given, open default sequence column
if ( strcmp ( seq_column_name , " " ) = = 0 )
2016-11-29 16:52:41 +01:00
{
2016-12-12 11:58:59 +01:00
if ( strcmp ( ( seq_view - > infos ) - > view_type , VIEW_TYPE_NUC_SEQS ) = = 0 )
iseq_column = obi_view_get_column ( seq_view , NUC_SEQUENCE_COLUMN ) ;
else
{
obi_set_errno ( OBI_ALIGN_ERROR ) ;
obidebug ( 1 , " \n Error: no column given to align " ) ;
2016-11-29 16:52:41 +01:00
return - 1 ;
2016-12-12 11:58:59 +01:00
}
2016-11-29 16:52:41 +01:00
}
2016-12-12 11:58:59 +01:00
else
iseq_column = obi_view_get_column ( seq_view , seq_column_name ) ;
if ( iseq_column = = NULL )
2016-05-11 16:36:23 +02:00
{
2016-12-12 11:58:59 +01:00
obidebug ( 1 , " \n Error getting the column to align " ) ;
2016-05-11 16:36:23 +02:00
return - 1 ;
}
2016-12-12 11:58:59 +01:00
// Get element index of the sequence to align in each line to compute it only once
if ( ( strcmp ( seq_elt_name , " " ) ! = 0 ) & & ( seq_elt_name ! = NULL ) )
2016-05-11 16:36:23 +02:00
{
2016-12-12 11:58:59 +01:00
seq_elt_idx = obi_column_get_element_index_from_name ( iseq_column , seq_elt_name ) ;
if ( seq_elt_idx = = OBIIdx_NA )
{
obidebug ( 1 , " \n Error getting the sequence index in a column line when aligning " ) ;
return - 1 ;
}
}
else
seq_elt_idx = 0 ;
// Open the ID column, containing the identifiers of the sequences to align
// If a column name wasn't given, open default ID column
if ( strcmp ( id_column_name , " " ) = = 0 )
{
if ( strcmp ( ( seq_view - > infos ) - > view_type , VIEW_TYPE_NUC_SEQS ) = = 0 )
id_column = obi_view_get_column ( seq_view , ID_COLUMN ) ;
else
{
obi_set_errno ( OBI_ALIGN_ERROR ) ;
obidebug ( 1 , " \n Error: no ID column given " ) ;
return - 1 ;
}
}
else
id_column = obi_view_get_column ( seq_view , id_column_name ) ;
if ( id_column = = NULL )
{
obidebug ( 1 , " \n Error getting the ID column " ) ;
2016-05-11 16:36:23 +02:00
return - 1 ;
}
2016-12-12 11:58:59 +01:00
// Create the output view
output_view = obi_new_view ( dms , output_view_name , NULL , NULL , output_view_comments ) ;
if ( output_view = = NULL )
2016-11-29 16:15:02 +01:00
{
2016-12-12 11:58:59 +01:00
obidebug ( 1 , " \n Error creating the output view when aligning " ) ;
return - 1 ;
}
// Create the output columns
2016-12-13 17:18:12 +01:00
if ( create_alignment_output_columns ( output_view ,
( id_column - > header ) - > indexer_name , ( id_column - > header ) - > indexer_name ,
( iseq_column - > header ) - > indexer_name , ( iseq_column - > header ) - > indexer_name ,
print_seq , print_count , normalize , reference , similarity_mode ) < 0 )
2016-12-12 11:58:59 +01:00
return - 1 ;
id1_column = obi_view_get_column ( output_view , ID1_COLUMN_NAME ) ;
id2_column = obi_view_get_column ( output_view , ID2_COLUMN_NAME ) ;
idx1_column = obi_view_get_column ( output_view , IDX1_COLUMN_NAME ) ;
idx2_column = obi_view_get_column ( output_view , IDX2_COLUMN_NAME ) ;
2016-12-13 17:18:12 +01:00
lcs_length_column = obi_view_get_column ( output_view , LCS_LENGTH_COLUMN_NAME ) ;
2016-12-12 11:58:59 +01:00
if ( ( reference = = ALILEN ) & & ( normalize | | ! similarity_mode ) )
ali_length_column = obi_view_get_column ( output_view , ALI_LENGTH_COLUMN_NAME ) ;
score_column = obi_view_get_column ( output_view , SCORE_COLUMN_NAME ) ;
if ( print_seq )
{
seq1_column = obi_view_get_column ( output_view , SEQ1_COLUMN_NAME ) ;
seq2_column = obi_view_get_column ( output_view , SEQ2_COLUMN_NAME ) ;
}
// if (print_count) // TODO count columns not implemented yet
// {
// count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME);
// count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME);
// }
2016-11-18 16:29:28 +01:00
// Build kmer tables
2016-12-12 11:58:59 +01:00
ktable = hash_seq_column ( seq_view , iseq_column , seq_elt_idx ) ;
2016-11-18 16:29:28 +01:00
if ( ktable = = NULL )
{
obi_set_errno ( OBI_ALIGN_ERROR ) ;
obidebug ( 1 , " \n Error building kmer tables before aligning " ) ;
return - 1 ;
}
2016-12-12 11:58:59 +01:00
seq_count = ( iseq_column - > header ) - > lines_used ;
2016-05-11 16:36:23 +02:00
for ( i = 0 ; i < ( seq_count - 1 ) ; i + + )
{
2016-11-18 16:29:28 +01:00
if ( i % 100 = = 0 )
fprintf ( stderr , " \r Done : %f %% " , ( i / ( float ) seq_count ) * 100 ) ;
2016-05-11 16:36:23 +02:00
for ( j = i + 1 ; j < seq_count ; j + + )
{
2016-12-12 11:58:59 +01:00
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view ( seq_view , iseq_column , i , seq_elt_idx ) ;
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view ( seq_view , iseq_column , j , seq_elt_idx ) ;
seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view ( seq_view , iseq_column , i , seq_elt_idx ) ;
seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view ( seq_view , iseq_column , j , seq_elt_idx ) ;
2016-05-11 16:36:23 +02:00
2016-11-18 16:29:28 +01:00
if ( ( blob1 = = NULL ) | | ( blob2 = = NULL ) )
2016-05-11 16:36:23 +02:00
{
obidebug ( 1 , " \n Error retrieving sequences to align " ) ;
return - 1 ;
}
2016-11-28 11:39:29 +01:00
// Check if the sequences are identical in a quick way (same index in the same indexer)
2016-12-12 11:58:59 +01:00
if ( obi_get_index_with_elt_idx_and_col_p_in_view ( seq_view , iseq_column , i , seq_elt_idx ) = = obi_get_index_with_elt_idx_and_col_p_in_view ( seq_view , iseq_column , j , seq_elt_idx ) )
2016-11-28 11:39:29 +01:00
{
if ( similarity_mode & & normalize )
score = 1.0 ;
else if ( ! similarity_mode )
score = 0.0 ;
else
score = blob1 - > length_decoded_value ;
}
2016-08-10 14:51:02 +02:00
2016-11-28 11:39:29 +01:00
else // the sequences aren't identical
{
// kmer filter
align_filters ( ktable , blob1 , blob2 , i , j , threshold , normalize , reference , similarity_mode , & score , & lcs_min , false ) ;
// Compute alignment score
if ( ( threshold = = 0 ) | | ( score = = - 1.0 ) ) // no threshold, or filter passed: align
2016-12-12 11:58:59 +01:00
score = obiblob_sse_banded_lcs_align ( blob1 , blob2 , threshold , normalize , reference , similarity_mode , & lcs_length , & ali_length ) ;
2016-11-28 11:39:29 +01:00
}
2016-05-11 16:36:23 +02:00
2016-11-18 16:29:28 +01:00
if ( ( score > = 0 ) & & ( ( ( normalize | | similarity_mode ) & & ( score > = threshold ) ) | | ( ( ! similarity_mode & & ! normalize ) & & ( score < = threshold ) ) ) )
2016-12-12 11:58:59 +01:00
{ // Print result // TODO make separate function maybe
2016-08-10 14:51:02 +02:00
2016-12-12 11:58:59 +01:00
// Get ids idx
id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view ( seq_view , id_column , i , 0 ) ; // TODO Could there be multiple IDs per line?
id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view ( seq_view , id_column , j , 0 ) ;
2016-08-10 14:51:02 +02:00
2016-12-13 17:18:12 +01:00
if ( print_alignment_result ( output_view , k ,
idx1_column , idx2_column , i , j ,
id1_column , id2_column , id1_idx , id2_idx ,
print_seq , seq1_column , seq2_column , seq1_idx , seq2_idx ,
//print_count, count1_column, count2_column, count1, count2,
ali_length_column , ali_length ,
lcs_length_column , lcs_length ,
score_column , score ,
reference , normalize , similarity_mode ) < 0 )
2016-05-11 16:36:23 +02:00
return - 1 ;
2016-08-10 14:51:02 +02:00
2016-11-18 16:29:28 +01:00
k + + ;
}
2016-05-11 16:36:23 +02:00
}
}
2016-12-12 11:58:59 +01:00
// Close views
if ( obi_close_view ( seq_view ) < 0 )
{
obidebug ( 1 , " \n Error closing the input view after aligning " ) ;
return - 1 ;
}
if ( obi_close_view ( output_view ) < 0 )
{
obidebug ( 1 , " \n Error closing the output view after aligning " ) ;
return - 1 ;
}
2016-11-18 16:29:28 +01:00
free_kmer_tables ( ktable , seq_count ) ;
2016-05-11 16:36:23 +02:00
return 0 ;
}
2016-08-10 14:51:02 +02:00
// TODO discuss if 2 input views or 2 columns or both possible
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2, // TODO it's implied both seq columns are in the same view but maybe it shouldn't
// Obiview_p score_view, OBIDMS_column_p score_column,
// double threshold, bool normalize, int reference, bool similarity_mode)
//{
// index_t i, j, k;
// index_t seq_count_1;
// index_t seq_count_2;
// char* seq1;
// char* seq2;
// double score;
//
// k = 0;
//
// if (((seq_column_1->header)->returned_data_type != OBI_SEQ) || ((seq_column_2->header)->returned_data_type != OBI_SEQ))
// {
// obi_set_errno(OBI_ALIGN_ERROR);
// obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ");
// return -1;
// }
//
// if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) ||
// (!normalize && ((score_column->header)->returned_data_type != OBI_INT)))
// {
// obi_set_errno(OBI_ALIGN_ERROR);
// obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type");
// return -1;
// }
//
// seq_count_1 = (seq_column_1->header)->lines_used;
// seq_count_2 = (seq_column_2->header)->lines_used;
//
// for (i=0; i < (seq_count_1 - 1); i++)
// {
// for (j=0; j < seq_count_2; j++)
// {
// //fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
//
// seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_1, i, 0);
// seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_2, j, 0);
//
// if ((seq1 == NULL) || (seq2 == NULL))
// {
// obidebug(1, "\nError retrieving sequences to align");
// return -1;
// }
//
// // TODO kmer filter
//
// score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
//
// if (normalize)
// {
// if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
// {
// obidebug(1, "\nError writing alignment score in a column");
// return -1;
// }
// }
// else
// {
// if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
// {
// obidebug(1, "\nError writing alignment score in a column");
// return -1;
// }
// }
//
// free(seq1);
// free(seq2);
//
// k++;
// }
// }
//
// return 0;
//}