From d99447c12b2505ce96d874cf2e65a63bb1884777 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Fri, 16 Dec 2016 19:39:02 +0100
Subject: [PATCH] C function for LCS alignment of two columns, and optimized
 and fixed line count bug in function to align one column

---
 src/obi_align.c | 466 +++++++++++++++++++++++++++++++++++++++---------
 src/obi_align.h |  61 ++++++-
 2 files changed, 430 insertions(+), 97 deletions(-)

diff --git a/src/obi_align.c b/src/obi_align.c
index a23e27f..97f6d4c 100644
--- a/src/obi_align.c
+++ b/src/obi_align.c
@@ -1,12 +1,12 @@
 /****************************************************************************
- * Sequence alignment functions				                                *
+ * LCS sequence alignment functions				                            *
  ****************************************************************************/
 
 /**
  * @file obi_align.c
  * @author Celine Mercier
  * @date May 4th 2016
- * @brief Functions handling sequence alignments.
+ * @brief Functions handling LCS sequence alignments.
  */
 
 
@@ -407,7 +407,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 	Obiview_p       seq_view = NULL;
 	Obiview_p       output_view = NULL;
 	OBIDMS_column_p iseq_column = NULL;
-	OBIDMS_column_p id_column;
+	OBIDMS_column_p id_column = NULL;
 	OBIDMS_column_p id1_column = NULL;
 	OBIDMS_column_p id2_column = NULL;
 	OBIDMS_column_p seq1_column = NULL;
@@ -451,6 +451,14 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 		return -1;
 	}
 
+	// Check column type
+	if ((iseq_column->header)->returned_data_type != OBI_SEQ)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError: column given to align is not an OBI_SEQ column");
+		return -1;
+	}
+
 	// Get element index of the sequence to align in each line to compute it only once
 	if ((strcmp(seq_elt_name, "") != 0) && (seq_elt_name != NULL))
 	{
@@ -527,21 +535,30 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 		return -1;
 	}
 
-	seq_count = (iseq_column->header)->lines_used;
+	seq_count = (seq_view->infos)->line_count;
 
 	for (i=0; i < (seq_count - 1); i++)
 	{
 		if (i%100 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) seq_count)*100);
 
+		// Get first id idx
+		id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);	// TODO Could there be multiple IDs per line?
+		// Get first sequence and its index
+		seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
+		blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
+		if (blob1 == NULL)
+		{
+			obidebug(1, "\nError retrieving sequences to align");
+			return -1;
+		}
+
 		for (j=i+1; j < seq_count; j++)
 		{
-			blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
-			blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
-			seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
+			// Get second sequence and its index
 			seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
-
-			if ((blob1 == NULL) || (blob2 == NULL))
+			blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
+			if (blob2 == NULL)
 			{
 				obidebug(1, "\nError retrieving sequences to align");
 				return -1;
@@ -569,10 +586,9 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 			}
 
 			if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
-			{	// Print result		// TODO make separate function maybe
+			{	// Print result
 
-				// Get ids idx
-				id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);	// TODO Could there be multiple IDs per line?
+				// Get second id idx
 				id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
 
 				if (print_alignment_result(output_view, k,
@@ -609,82 +625,354 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 }
 
 
-// TODO discuss if 2 input views or 2 columns or both possible
-//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,	// TODO it's implied both seq columns are in the same view but maybe it shouldn't
-//						  Obiview_p score_view, OBIDMS_column_p score_column,
-//						  double threshold, bool normalize, int reference, bool similarity_mode)
-//{
-//	index_t i, j, k;
-//	index_t seq_count_1;
-//	index_t seq_count_2;
-//	char* seq1;
-//	char* seq2;
-//	double score;
-//
-//	k = 0;
-//
-//	if (((seq_column_1->header)->returned_data_type != OBI_SEQ) || ((seq_column_2->header)->returned_data_type != OBI_SEQ))
-//	{
-//		obi_set_errno(OBI_ALIGN_ERROR);
-//		obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ");
-//		return -1;
-//	}
-//
-//	if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) ||
-//			(!normalize && ((score_column->header)->returned_data_type != OBI_INT)))
-//	{
-//		obi_set_errno(OBI_ALIGN_ERROR);
-//		obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type");
-//		return -1;
-//	}
-//
-//	seq_count_1 = (seq_column_1->header)->lines_used;
-//	seq_count_2 = (seq_column_2->header)->lines_used;
-//
-//	for (i=0; i < (seq_count_1 - 1); i++)
-//	{
-//		for (j=0; j < seq_count_2; j++)
-//		{
-//			//fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
-//
-//			seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_1, i, 0);
-//			seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_2, j, 0);
-//
-//			if ((seq1 == NULL) || (seq2 == NULL))
-//			{
-//				obidebug(1, "\nError retrieving sequences to align");
-//				return -1;
-//			}
-//
-//			// TODO kmer filter
-//
-//			score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
-//
-//			if (normalize)
-//			{
-//				if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
-//				{
-//					obidebug(1, "\nError writing alignment score in a column");
-//					return -1;
-//				}
-//			}
-//			else
-//			{
-//				if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
-//				{
-//					obidebug(1, "\nError writing alignment score in a column");
-//					return -1;
-//				}
-//			}
-//
-//			free(seq1);
-//			free(seq2);
-//
-//			k++;
-//		}
-//	}
-//
-//	return 0;
-//}
+int obi_lcs_align_two_columns(OBIDMS_p dms,
+							  const char* seq1_view_name,
+							  const char* seq2_view_name,
+							  const char* seq1_column_name,
+							  const char* seq2_column_name,
+							  const char* seq1_elt_name,
+							  const char* seq2_elt_name,
+							  const char* id1_column_name,
+							  const char* id2_column_name,
+					          const char* output_view_name, const char* output_view_comments,
+							  bool print_seq, bool print_count,
+						      double threshold, bool normalize, int reference, bool similarity_mode)
+{
+	index_t         i, j, k;
+	index_t         seq1_count;
+	index_t         seq2_count;
+	index_t         id1_idx, id2_idx;
+	index_t         seq1_idx, seq2_idx;
+	double          score;
+	int             lcs_length;
+	int             ali_length;
+	Kmer_table_p    ktable;
+	Obi_blob_p      blob1;
+	Obi_blob_p   	blob2;
+	int				lcs_min;
+	index_t         seq1_elt_idx;
+	index_t         seq2_elt_idx;
+	bool 			same_indexer;
+
+	Obiview_p       seq1_view = NULL;
+	Obiview_p       seq2_view = NULL;
+	Obiview_p       output_view = NULL;
+	OBIDMS_column_p i_seq1_column = NULL;
+	OBIDMS_column_p i_seq2_column = NULL;
+	OBIDMS_column_p i_id1_column = NULL;
+	OBIDMS_column_p i_id2_column = NULL;
+	OBIDMS_column_p id1_column = NULL;
+	OBIDMS_column_p id2_column = NULL;
+	OBIDMS_column_p seq1_column = NULL;
+	OBIDMS_column_p seq2_column = NULL;
+	//OBIDMS_column_p count1_column = NULL;
+	//OBIDMS_column_p count2_column = NULL;
+	OBIDMS_column_p idx1_column = NULL;
+	OBIDMS_column_p idx2_column = NULL;
+	OBIDMS_column_p lcs_length_column = NULL;
+	OBIDMS_column_p ali_length_column = NULL;
+	OBIDMS_column_p score_column = NULL;
+
+	k = 0;
+
+	// Open the first input view
+	seq1_view = obi_open_view(dms, seq1_view_name);
+	if (seq1_view == NULL)
+	{
+		obidebug(1, "\nError opening the first input view to align");
+		return -1;
+	}
+
+	// Open the second input view. Same as 1st if ""
+	if (strcmp(seq2_view_name, "") == 0)
+		seq2_view = seq1_view;
+	else
+	{
+		seq2_view = obi_open_view(dms, seq2_view_name);
+		if (seq2_view == NULL)
+		{
+			obidebug(1, "\nError opening the second input view to align");
+			return -1;
+		}
+	}
+
+	// Open the first sequence column to align
+	// If a column name wasn't given, open default sequence column
+	if (strcmp(seq1_column_name, "") == 0)
+	{
+		if (strcmp((seq1_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
+			i_seq1_column = obi_view_get_column(seq1_view, NUC_SEQUENCE_COLUMN);
+		else
+		{
+			obi_set_errno(OBI_ALIGN_ERROR);
+			obidebug(1, "\nError: no first column given to align");
+			return -1;
+		}
+	}
+	else
+		i_seq1_column = obi_view_get_column(seq1_view, seq1_column_name);
+	if (i_seq1_column == NULL)
+	{
+		obidebug(1, "\nError getting the first column to align");
+		return -1;
+	}
+
+	// Check column type
+	if ((i_seq1_column->header)->returned_data_type != OBI_SEQ)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError: first column given to align is not an OBI_SEQ column");
+		return -1;
+	}
+
+	// Open the second sequence column to align
+	// If a column name wasn't given, open default sequence column
+	if (strcmp(seq2_column_name, "") == 0)
+	{
+		if (strcmp((seq2_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
+			i_seq2_column = obi_view_get_column(seq2_view, NUC_SEQUENCE_COLUMN);
+		else
+		{
+			obi_set_errno(OBI_ALIGN_ERROR);
+			obidebug(1, "\nError: no second column given to align");
+			return -1;
+		}
+	}
+	else
+		i_seq2_column = obi_view_get_column(seq2_view, seq2_column_name);
+	if (i_seq2_column == NULL)
+	{
+		obidebug(1, "\nError getting the second column to align");
+		return -1;
+	}
+	// Check that the sequence columns are not both the default NUC_SEQ column of the same view
+	if (i_seq1_column == i_seq2_column)
+	{
+		obidebug(1, "\nError: trying to align a column with itself (default NUC_SEQ column of the same view)");
+		return -1;
+	}
+
+	// Check column type
+	if ((i_seq2_column->header)->returned_data_type != OBI_SEQ)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError: second column given to align is not an OBI_SEQ column");
+		return -1;
+	}
+
+	// Get element index of the sequence to align in each line of the first column to compute it only once
+	if ((strcmp(seq1_elt_name, "") != 0) && (seq1_elt_name != NULL))
+	{
+		seq1_elt_idx = obi_column_get_element_index_from_name(i_seq1_column, seq1_elt_name);
+		if (seq1_elt_idx == OBIIdx_NA)
+		{
+			obidebug(1, "\nError getting the sequence index in a column line when aligning");
+			return -1;
+		}
+	}
+	else
+		seq1_elt_idx = 0;
+
+	// Get element index of the sequence to align in each line of the second column to compute it only once
+	if ((strcmp(seq2_elt_name, "") != 0) && (seq2_elt_name != NULL))
+	{
+		seq2_elt_idx = obi_column_get_element_index_from_name(i_seq2_column, seq2_elt_name);
+		if (seq2_elt_idx == OBIIdx_NA)
+		{
+			obidebug(1, "\nError getting the sequence index in a column line when aligning");
+			return -1;
+		}
+	}
+	else
+		seq2_elt_idx = 0;
 
 
+	// Open the first ID column, containing the identifiers of the first sequence to align
+	// If a column name wasn't given, open default ID column
+	if (strcmp(id1_column_name, "") == 0)
+	{
+		if (strcmp((seq1_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
+			i_id1_column = obi_view_get_column(seq1_view, ID_COLUMN);
+		else
+		{
+			obi_set_errno(OBI_ALIGN_ERROR);
+			obidebug(1, "\nError: no first ID column given");
+			return -1;
+		}
+	}
+	else
+		i_id1_column = obi_view_get_column(seq1_view, id1_column_name);
+	if (i_id1_column == NULL)
+	{
+		obidebug(1, "\nError getting the first ID column");
+		return -1;
+	}
+
+	// Open the second ID column, containing the identifiers of the second sequence to align
+	// If a column name wasn't given, open default ID column
+	if (strcmp(id2_column_name, "") == 0)
+	{
+		if (strcmp((seq2_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
+			i_id2_column = obi_view_get_column(seq2_view, ID_COLUMN);
+		else
+		{
+			obi_set_errno(OBI_ALIGN_ERROR);
+			obidebug(1, "\nError: no second ID column given");
+			return -1;
+		}
+	}
+	else
+		i_id2_column = obi_view_get_column(seq2_view, id2_column_name);
+	if (i_id2_column == NULL)
+	{
+		obidebug(1, "\nError getting the second ID column");
+		return -1;
+	}
+
+	// Create the output view
+	output_view = obi_new_view(dms, output_view_name, NULL, NULL, output_view_comments);
+	if (output_view == NULL)
+	{
+		obidebug(1, "\nError creating the output view when aligning");
+		return -1;
+	}
+
+	// Create the output columns
+	if (create_alignment_output_columns(output_view,
+			(i_id1_column->header)->indexer_name, (i_id2_column->header)->indexer_name,
+			(i_seq1_column->header)->indexer_name, (i_seq2_column->header)->indexer_name,
+			print_seq, print_count, normalize, reference, similarity_mode) < 0)
+		return -1;
+	id1_column = obi_view_get_column(output_view, ID1_COLUMN_NAME);
+	id2_column = obi_view_get_column(output_view, ID2_COLUMN_NAME);
+	idx1_column = obi_view_get_column(output_view, IDX1_COLUMN_NAME);
+	idx2_column = obi_view_get_column(output_view, IDX2_COLUMN_NAME);
+    lcs_length_column = obi_view_get_column(output_view, LCS_LENGTH_COLUMN_NAME);
+	if ((reference == ALILEN) && (normalize || !similarity_mode))
+		ali_length_column = obi_view_get_column(output_view, ALI_LENGTH_COLUMN_NAME);
+	score_column = obi_view_get_column(output_view, SCORE_COLUMN_NAME);
+	if (print_seq)
+	{
+		seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME);
+		seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME);
+	}
+//	if (print_count)  // TODO count columns not implemented yet
+//	{
+//		count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME);
+//		count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME);
+//	}
+
+	// Check if the sequence columns share the same indexer (allows for quick checking of sequence equality)
+	if (strcmp((i_seq1_column->header)->indexer_name, (i_seq2_column->header)->indexer_name) == 0)
+		same_indexer = true;
+	else
+		same_indexer = false;
+
+	// Build kmer tables
+	ktable = hash_two_seq_columns(seq1_view, i_seq1_column, seq1_elt_idx, seq2_view, i_seq2_column, seq2_elt_idx);
+	if (ktable == NULL)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError building kmer tables before aligning");
+		return -1;
+	}
+
+	seq1_count = (seq1_view->infos)->line_count;
+	seq2_count = (seq2_view->infos)->line_count;
+
+	for (i=0; i < seq1_count; i++)
+	{
+		if (i%100 == 0)
+			fprintf(stderr,"\rDone : %f %%       ", (i / (float) seq1_count)*100);
+
+		// Get id index of first sequence
+		id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq1_view, i_id1_column, i, 0); // TODO Could there be multiple IDs per line?
+		// Get first sequence and its index
+		seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq1_view, i_seq1_column, i, seq1_elt_idx);
+		blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq1_view, i_seq1_column, i, seq1_elt_idx);
+		if (blob1 == NULL)
+		{
+			obidebug(1, "\nError retrieving sequences to align");
+			return -1;
+		}
+
+		for (j=0; j < seq2_count; j++)
+		{
+			// Get second sequence and its index
+			seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq2_view, i_seq2_column, j, seq2_elt_idx);
+			blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq2_view, i_seq2_column, j, seq2_elt_idx);
+			if (blob2 == NULL)
+			{
+				obidebug(1, "\nError retrieving sequences to align");
+				return -1;
+			}
+
+			// Check if the sequences are identical in a quick way (same index in the same indexer)
+			if (same_indexer && (seq1_idx == seq2_idx))
+			{
+				if (similarity_mode && normalize)
+					score = 1.0;
+				else if (!similarity_mode)
+					score = 0.0;
+				else
+					score = blob1->length_decoded_value;
+			}
+
+			else // the sequences aren't identical
+			{
+				// kmer filter (offset for the index of the kmer table of the 2nd sequence because the kmer tables of the 2 sequence columns are concatenated in one)
+				align_filters(ktable, blob1, blob2, i, seq1_count+j, threshold, normalize, reference, similarity_mode, &score, &lcs_min, false);
+
+				// Compute alignment score
+				if ((threshold == 0) || (score == -1.0))	// no threshold, or filter passed: align
+					score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
+			}
+
+			if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
+			{	// Print result
+
+				// Get second id idx
+				id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq2_view, i_id2_column, j, 0);
+
+				if (print_alignment_result(output_view, k,
+										   idx1_column, idx2_column, i, j,
+										   id1_column, id2_column, id1_idx, id2_idx,
+						                   print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx,
+										   //print_count, count1_column, count2_column, count1, count2,
+										   ali_length_column, ali_length,
+										   lcs_length_column, lcs_length,
+										   score_column, score,
+										   reference, normalize, similarity_mode) < 0)
+					return -1;
+
+				k++;
+			}
+		}
+	}
+
+	// Close views
+	if (seq2_view != seq1_view)
+	{
+		if (obi_close_view(seq2_view) < 0)
+		{
+			obidebug(1, "\nError closing the second input view after aligning");
+			return -1;
+		}
+	}
+	if (obi_close_view(seq1_view) < 0)
+	{
+		obidebug(1, "\nError closing the first input view after aligning");
+		return -1;
+	}
+
+	if (obi_close_view(output_view) < 0)
+	{
+		obidebug(1, "\nError closing the output view after aligning");
+		return -1;
+	}
+
+	free_kmer_tables(ktable, seq1_count + seq2_count);
+
+	return 0;
+}
+
diff --git a/src/obi_align.h b/src/obi_align.h
index c0d823c..68048bd 100644
--- a/src/obi_align.h
+++ b/src/obi_align.h
@@ -1,12 +1,12 @@
 /****************************************************************************
- * Sequence alignment functions header file	                                *
+ * LCS sequence alignment functions header file	                            *
  ****************************************************************************/
 
 /**
  * @file obi_align.h
  * @author Celine Mercier
  * @date May 11th 2016
- * @brief Header file for the functions handling the alignment of DNA sequences.
+ * @brief Header file for the functions handling the LCS alignment of DNA sequences.
  */
 
 
@@ -55,7 +55,7 @@
 
 
 /**
- * @brief Aligns a NUC_SEQ column with itself.
+ * @brief Aligns an OBI_SEQ column with itself.
  *
  * Note: The columns where the results are written are automatically named and created.
  *
@@ -96,14 +96,59 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
 
 
 /**
- * @brief
+ * @brief Aligns two OBI_SEQ columns.
  *
- * TODO
+ * The columns must belong to the same OBIDMS, but can belong to different views.
  *
+ * Note: The columns where the results are written are automatically named and created.
+ *
+ * @param dms A pointer on an OBIDMS.
+ * @param seq1_view_name The name of the view where the first column to align is.
+ * @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one).
+ * @param seq1_column_name The name of the first OBI_SEQ column in the input view to align.
+ *                         If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
+ * @param seq2_column_name The name of the second OBI_SEQ column in the input view to align.
+ *                         If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
+ * @param seq1_elt_name The name of the element in the first column corresponding to the sequence to align, if the column has multiple
+ *                      elements per line.
+ * @param seq2_elt_name The name of the element in the second column corresponding to the sequence to align, if the column has multiple
+ *                      elements per line.
+ * @param id1_column_name The name of the column in the first input view containing the identifiers of the first sequence to align.
+ *                        If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
+ * @param id2_column_name The name of the column in the second input view containing the identifiers of the second sequence to align.
+ *                        If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
+ * @param output_view_name The name of the output view where the results should be written (should not already exist).
+ * @param output_view_comments The comments that should be associated with the output view.
+ * @param print_seq A boolean indicating whether the aligned sequences should be copied in the output view.
+ * @param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view.
+ * @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
+ * 					for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
+ * 					e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
+ * 					the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
+ *                  it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
+ * @param normalize Whether the score should be normalized with the reference sequence length.
+ * @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
+ * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
+ *
+ * @returns A value indicating the success of the operation.
+ * @retval 0 if the operation was successfully completed.
+ * @retval -1 if an error occurred.
+ *
+ * @since December 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,
-//						  Obiview_p score_view, OBIDMS_column_p score_column,
-//						  double threshold, bool normalize, int reference, bool similarity_mode);
+int obi_lcs_align_two_columns(OBIDMS_p dms,
+							  const char* seq1_view_name,
+							  const char* seq2_view_name,
+							  const char* seq1_column_name,
+							  const char* seq2_column_name,
+							  const char* seq1_elt_name,
+							  const char* seq2_elt_name,
+							  const char* id1_column_name,
+							  const char* id2_column_name,
+					          const char* output_view_name, const char* output_view_comments,
+							  bool print_seq, bool print_count,
+						      double threshold, bool normalize, int reference, bool similarity_mode);
 
 
 #endif /* OBI_ALIGN_H_ */