From 9b24818fe2b2381eddb39df607a4eba25377a5ad Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Tue, 13 Dec 2016 17:18:12 +0100
Subject: [PATCH 01/22] Refactored alignment code for minimum redundancy
 between the function that aligns 1 column and the function that aligns 2
 columns

---
 src/obi_align.c | 546 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 368 insertions(+), 178 deletions(-)

diff --git a/src/obi_align.c b/src/obi_align.c
index 390ff60..a23e27f 100644
--- a/src/obi_align.c
+++ b/src/obi_align.c
@@ -31,6 +31,360 @@
 // use openMP pragmas
 
 
+/**************************************************************************
+ *
+ * D E C L A R A T I O N   O F   T H E   P R I V A T E   F U N C T I O N S
+ *
+ **************************************************************************/
+
+
+/**
+ * @brief Internal function creating the columns where the alignment results are written.
+ *
+ * @param output_view A pointer on the writable view where the columns should be created.
+ * @param id1_indexer_name The name of the indexer where the id of the 1st sequence aligned is indexed.
+ * @param id2_indexer_name The name of the indexer where the id of the 2nd sequence aligned is indexed.
+ * @param seq1_indexer_name The name of the indexer where the 1st sequence aligned is indexed (needed only if print_seq is True).
+ * @param seq2_indexer_name The name of the indexer where the 2nd sequence aligned is indexed (needed only if print_seq is True).
+ * @param print_seq A boolean indicating whether the aligned sequences should be copied in the output view.
+ * @param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view.
+ * @param normalize Whether the score should be normalized with the reference sequence length.
+ * @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
+ * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
+ *
+ * @retval 0 if the operation was successfully completed.
+ * @retval -1 if an error occurred.
+ *
+ * @since December 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int create_alignment_output_columns(Obiview_p output_view,
+										   const char* id1_indexer_name,
+										   const char* id2_indexer_name,
+										   const char* seq1_indexer_name,
+										   const char* seq2_indexer_name,
+		                                   bool print_seq, bool print_count,
+										   bool normalize, int reference, bool similarity_mode);
+
+
+/**
+ * @brief Internal function printing the result of one alignment to the output view.
+ *
+ * @param output_view A pointer on the writable view where the columns should be created.
+ * @param line The line in the output view where the result should be written.
+ * @param idx1_column A pointer on the column where the index referring to the line of the first sequence aligned in the input view should be written.
+ * @param idx2_column A pointer on the column where the index referring to the line of the second sequence aligned in the input view should be written.
+ * @param idx1 The index referring to the line of the first sequence aligned in the input view.
+ * @param idx2 The index referring to the line of the second sequence aligned in the input view.
+ * @param id1_column A pointer on the column where the identifier of the first sequence aligned should be written.
+ * @param id2_column A pointer on the column where the identifier of the second sequence aligned should be written.
+ * @param id1_idx The index of the identifier of the first sequence aligned.
+ * @param id2_idx The index of the identifier of the second sequence aligned.
+ * @param print_seq A boolean indicating whether the aligned sequences should be copied in the output view.
+ * @param seq1_column A pointer on the column where the first sequence aligned should be written.
+ * @param seq2_column A pointer on the column where the second sequence aligned should be written.
+ * @param seq1_idx The index of the sequence of the first sequence aligned.
+ * @param seq2_idx The index of the sequence of the second sequence aligned.
+ * @param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view.		// Count columns not implement yet
+ * @param count1_column A pointer on the column where the count of the first sequence aligned should be written.
+ * @param count2_column A pointer on the column where the count of the second sequence aligned should be written.
+ * @param count1 The count of the first sequence aligned.
+ * @param count2 The count of the second sequence aligned.
+ * @param ali_length_column A pointer on the column where the alignment length should be written.
+ * @param ali_length The alignment length.
+ * @param lcs_length_column A pointer on the column where the LCS length should be written.
+ * @param lcs_length The LCS length.
+ * @param score_column A pointer on the column where the score should be written.
+ * @param score The alignment score.
+ * @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
+ * @param normalize Whether the score should be normalized with the reference sequence length.
+ * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
+ *
+ * @retval 0 if the operation was successfully completed.
+ * @retval -1 if an error occurred.
+ *
+ * @since December 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int print_alignment_result(Obiview_p output_view,
+								   index_t line,
+								   OBIDMS_column_p idx1_column,
+								   OBIDMS_column_p idx2_column,
+								   index_t idx1,
+								   index_t idx2,
+								   OBIDMS_column_p id1_column,
+								   OBIDMS_column_p id2_column,
+								   index_t id1_idx,
+								   index_t id2_idx,
+								   bool print_seq,
+								   OBIDMS_column_p seq1_column,
+								   OBIDMS_column_p seq2_column,
+								   index_t seq1_idx,
+								   index_t seq2_idx,
+//								   bool print_count,
+//								   OBIDMS_column_p count1_column,
+//								   OBIDMS_column_p count2_column,
+//								   int count1,
+//								   int count2,
+								   OBIDMS_column_p ali_length_column,
+								   int ali_length,
+								   OBIDMS_column_p lcs_length_column,
+								   int lcs_length,
+								   OBIDMS_column_p score_column,
+								   double score,
+								   int reference,
+								   bool normalize,
+								   bool similarity_mode);
+
+
+
+/************************************************************************
+ *
+ * D E F I N I T I O N   O F   T H E   P R I V A T E   F U N C T I O N S
+ *
+ ************************************************************************/
+
+
+static int create_alignment_output_columns(Obiview_p output_view,
+										   const char* id1_indexer_name,
+										   const char* id2_indexer_name,
+										   const char* seq1_indexer_name,
+										   const char* seq2_indexer_name,
+		                                   bool print_seq, bool print_count,
+										   bool normalize, int reference, bool similarity_mode)
+{
+	// Create the column for the ids of the 1st sequence aligned
+	if (obi_view_add_column(output_view, ID1_COLUMN_NAME, -1, ID1_COLUMN_NAME, OBI_STR, 0, 1, NULL, id1_indexer_name, NULL, -1, ID1_COLUMN_COMMENTS, true) < 0)
+	{
+		obidebug(1, "\nError creating the first column for the sequence ids when aligning");
+		return -1;
+	}
+
+	// Create the column for the ids of the 2nd sequence aligned
+	if (obi_view_add_column(output_view, ID2_COLUMN_NAME, -1, ID2_COLUMN_NAME, OBI_STR, 0, 1, NULL, id2_indexer_name, NULL, -1, ID2_COLUMN_COMMENTS, true) < 0)
+	{
+		obidebug(1, "\nError creating the second column for the sequence ids when aligning");
+		return -1;
+	}
+
+	// Create the column for the index (in the input view) of the first sequences aligned
+	if (obi_view_add_column(output_view, IDX1_COLUMN_NAME, -1, IDX1_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, IDX1_COLUMN_COMMENTS, true) < 0)
+	{
+		obidebug(1, "\nError creating the first column for the sequence indices when aligning");
+		return -1;
+	}
+
+	// Create the column for the index (in the input view) of the second sequences aligned
+	if (obi_view_add_column(output_view, IDX2_COLUMN_NAME, -1, IDX2_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, IDX2_COLUMN_COMMENTS, true) < 0)
+	{
+		obidebug(1, "\nError creating the second column for the sequence indices when aligning");
+		return -1;
+	}
+
+	// Create the column for the LCS length
+	if (obi_view_add_column(output_view, LCS_LENGTH_COLUMN_NAME, -1, LCS_LENGTH_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, LCS_LENGTH_COLUMN_COMMENTS, true) < 0)
+	{
+		obidebug(1, "\nError creating the column for the LCS length when aligning");
+		return -1;
+	}
+
+	// Create the column for the alignment length if it is computed
+	if ((reference == ALILEN) && (normalize || !similarity_mode))
+	{
+		if (obi_view_add_column(output_view, ALI_LENGTH_COLUMN_NAME, -1, ALI_LENGTH_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, ALI_LENGTH_COLUMN_COMMENTS, true) < 0)
+		{
+			obidebug(1, "\nError creating the column for the alignment length when aligning");
+			return -1;
+		}
+	}
+	// Create the column for the alignment score
+	if (normalize)
+	{
+		if (obi_view_add_column(output_view, SCORE_COLUMN_NAME, -1, SCORE_COLUMN_NAME, OBI_FLOAT, 0, 1, NULL, NULL, NULL, -1, SCORE_COLUMN_NAME, true) < 0)
+		{
+			obidebug(1, "\nError creating the column for the score when aligning");
+			return -1;
+		}
+	}
+	else
+	{
+		if (obi_view_add_column(output_view, SCORE_COLUMN_NAME, -1, SCORE_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, SCORE_COLUMN_NAME, true) < 0)
+		{
+			obidebug(1, "\nError creating the column for the score when aligning");
+			return -1;
+		}
+	}
+
+	if (print_seq)
+	{
+		// Create the column for the first sequences aligned
+		if (obi_view_add_column(output_view, SEQ1_COLUMN_NAME, -1, SEQ1_COLUMN_NAME, OBI_SEQ, 0, 1, NULL, seq1_indexer_name, NULL, -1, SEQ1_COLUMN_COMMENTS, true) < 0)
+		{
+			obidebug(1, "\nError creating the first column for the sequences when aligning");
+			return -1;
+		}
+
+		// Create the column for the second sequences aligned
+		if (obi_view_add_column(output_view, SEQ2_COLUMN_NAME, -1, SEQ2_COLUMN_NAME, OBI_SEQ, 0, 1, NULL, seq2_indexer_name, NULL, -1, SEQ2_COLUMN_COMMENTS, true) < 0)
+		{
+			obidebug(1, "\nError creating the second column for the sequences when aligning");
+			return -1;
+		}
+	}
+//	if (print_count)  // TODO count columns not implemented yet
+//	{
+//		// Create the column for the count of the first sequences aligned
+//		if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, COUNT1_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0)
+//		{
+//			obidebug(1, "\nError creating the first column for the sequence counts when aligning");
+//			return -1;
+//		}
+//
+//		// Create the column for the count of the second sequences aligned
+//		if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, COUNT2_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0)
+//		{
+//			obidebug(1, "\nError creating the second column for the sequence counts when aligning");
+//			return -1;
+//		}
+//	}
+
+	return 0;
+}
+
+
+static int print_alignment_result(Obiview_p output_view,
+								   index_t line,
+								   OBIDMS_column_p idx1_column,
+								   OBIDMS_column_p idx2_column,
+								   index_t idx1,
+								   index_t idx2,
+								   OBIDMS_column_p id1_column,
+								   OBIDMS_column_p id2_column,
+								   index_t id1_idx,
+								   index_t id2_idx,
+								   bool print_seq,
+								   OBIDMS_column_p seq1_column,
+								   OBIDMS_column_p seq2_column,
+								   index_t seq1_idx,
+								   index_t seq2_idx,
+//								   bool print_count,
+//								   OBIDMS_column_p count1_column,
+//								   OBIDMS_column_p count2_column,
+//								   int count1,
+//								   int count2,
+								   OBIDMS_column_p ali_length_column,
+								   int ali_length,
+								   OBIDMS_column_p lcs_length_column,
+								   int lcs_length,
+								   OBIDMS_column_p score_column,
+								   double score,
+								   int reference,
+								   bool normalize,
+								   bool similarity_mode)
+{
+	// Write line indices of the input view in the output view (to easily refer to the input sequences from the output view)
+	if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, idx1_column, line, 0, idx1) < 0)
+	{
+		obidebug(1, "\nError writing idx1 in a column");
+		return -1;
+	}
+	if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, idx2_column, line, 0, idx2) < 0)
+	{
+		obidebug(1, "\nError writing idx2 in a column");
+		return -1;
+	}
+
+	// Write ids in output view
+	if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, id1_column, line, 0, id1_idx) < 0)
+	{
+		obidebug(1, "\nError writing id1 in a column");
+		return -1;
+	}
+	if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, id2_column, line, 0, id2_idx) < 0)
+	{
+		obidebug(1, "\nError writing id2 in a column");
+		return -1;
+	}
+
+	// Write the sequences if needed
+	if (print_seq)
+	{
+		if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, seq1_column, line, 0, seq1_idx) < 0)
+		{
+			obidebug(1, "\nError writing seq1 in a column");
+			return -1;
+		}
+
+		if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, seq2_column, line, 0, seq2_idx) < 0)
+		{
+			obidebug(1, "\nError writing seq2 in a column");
+			return -1;
+		}
+	}
+
+//				// Write the counts if needed	// TODO count columns not implemented yet
+//				if (print_count)
+//				{
+//					if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0)
+//					{
+//						obidebug(1, "\nError writing count1 in a column");
+//						return -1;
+//					}
+//
+//					if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0)
+//					{
+//						obidebug(1, "\nError writing count2 in a column");
+//						return -1;
+//					}
+//				}
+
+	// Write the alignment length if it was computed
+	if ((reference == ALILEN) && (normalize || !similarity_mode))
+	{
+		if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, ali_length_column, line, 0, ali_length) < 0)
+		{
+			obidebug(1, "\nError writing alignment length in a column");
+			return -1;
+		}
+	}
+
+	// Write the LCS length
+	if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, lcs_length_column, line, 0, lcs_length) < 0)
+	{
+		obidebug(1, "\nError writing LCS length in a column");
+		return -1;
+	}
+
+	// Write score
+	if (normalize)
+	{
+		if (obi_set_float_with_elt_idx_and_col_p_in_view(output_view, score_column, line, 0, (obifloat_t) score) < 0)
+		{
+			obidebug(1, "\nError writing alignment score in a column");
+			return -1;
+		}
+	}
+	else
+	{
+		if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, score_column, line, 0, (obiint_t) score) < 0)
+		{
+			obidebug(1, "\nError writing alignment score in a column");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+
+
+/**********************************************************************
+ *
+ * D E F I N I T I O N   O F   T H E   P U B L I C   F U N C T I O N S
+ *
+ **********************************************************************/
+
+
 int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char* seq_column_name, const char* seq_elt_name,
 							 const char* id_column_name,
 					         const char* output_view_name, const char* output_view_comments,
@@ -140,114 +494,30 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 	}
 
 	// Create the output columns
-
-	// Create the column for the ids of the 1st sequence aligned
-	if (obi_view_add_column(output_view, ID1_COLUMN_NAME, -1, ID1_COLUMN_NAME, OBI_STR, 0, 1, NULL, (id_column->header)->indexer_name, NULL, -1, ID1_COLUMN_COMMENTS, true) < 0)
-	{
-		obidebug(1, "\nError creating the first column for the sequence ids when aligning");
+	if (create_alignment_output_columns(output_view,
+			(id_column->header)->indexer_name, (id_column->header)->indexer_name,
+			(iseq_column->header)->indexer_name, (iseq_column->header)->indexer_name,
+			print_seq, print_count, normalize, reference, similarity_mode) < 0)
 		return -1;
-	}
 	id1_column = obi_view_get_column(output_view, ID1_COLUMN_NAME);
-
-	// Create the column for the ids of the 2nd sequence aligned
-	if (obi_view_add_column(output_view, ID2_COLUMN_NAME, -1, ID2_COLUMN_NAME, OBI_STR, 0, 1, NULL, (id_column->header)->indexer_name, NULL, -1, ID2_COLUMN_COMMENTS, true) < 0)
-	{
-		obidebug(1, "\nError creating the second column for the sequence ids when aligning");
-		return -1;
-	}
 	id2_column = obi_view_get_column(output_view, ID2_COLUMN_NAME);
-
-	// Create the column for the index (in the input view) of the first sequences aligned
-	if (obi_view_add_column(output_view, IDX1_COLUMN_NAME, -1, IDX1_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, IDX1_COLUMN_COMMENTS, true) < 0)
-	{
-		obidebug(1, "\nError creating the first column for the sequence indices when aligning");
-		return -1;
-	}
 	idx1_column = obi_view_get_column(output_view, IDX1_COLUMN_NAME);
-
-	// Create the column for the index (in the input view) of the second sequences aligned
-	if (obi_view_add_column(output_view, IDX2_COLUMN_NAME, -1, IDX2_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, IDX2_COLUMN_COMMENTS, true) < 0)
-	{
-		obidebug(1, "\nError creating the second column for the sequence indices when aligning");
-		return -1;
-	}
 	idx2_column = obi_view_get_column(output_view, IDX2_COLUMN_NAME);
-
-	// Create the column for the LCS length
-	if (obi_view_add_column(output_view, LCS_LENGTH_COLUMN_NAME, -1, LCS_LENGTH_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, LCS_LENGTH_COLUMN_COMMENTS, true) < 0)
-	{
-		obidebug(1, "\nError creating the column for the LCS length when aligning");
-		return -1;
-	}
-	lcs_length_column = obi_view_get_column(output_view, LCS_LENGTH_COLUMN_NAME);
-
-	// Create the column for the alignment length if it is computed
+    lcs_length_column = obi_view_get_column(output_view, LCS_LENGTH_COLUMN_NAME);
 	if ((reference == ALILEN) && (normalize || !similarity_mode))
-	{
-		if (obi_view_add_column(output_view, ALI_LENGTH_COLUMN_NAME, -1, ALI_LENGTH_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, ALI_LENGTH_COLUMN_COMMENTS, true) < 0)
-		{
-			obidebug(1, "\nError creating the column for the alignment length when aligning");
-			return -1;
-		}
 		ali_length_column = obi_view_get_column(output_view, ALI_LENGTH_COLUMN_NAME);
-	}
-	// Create the column for the alignment score
-	if (normalize)
-	{
-		if (obi_view_add_column(output_view, SCORE_COLUMN_NAME, -1, SCORE_COLUMN_NAME, OBI_FLOAT, 0, 1, NULL, NULL, NULL, -1, SCORE_COLUMN_NAME, true) < 0)
-		{
-			obidebug(1, "\nError creating the column for the score when aligning");
-			return -1;
-		}
-	}
-	else
-	{
-		if (obi_view_add_column(output_view, SCORE_COLUMN_NAME, -1, SCORE_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, SCORE_COLUMN_NAME, true) < 0)
-		{
-			obidebug(1, "\nError creating the column for the score when aligning");
-			return -1;
-		}
-	}
 	score_column = obi_view_get_column(output_view, SCORE_COLUMN_NAME);
-
 	if (print_seq)
 	{
-		// Create the column for the first sequences aligned
-		if (obi_view_add_column(output_view, SEQ1_COLUMN_NAME, -1, SEQ1_COLUMN_NAME, OBI_SEQ, 0, 1, NULL, (iseq_column->header)->indexer_name, NULL, -1, SEQ1_COLUMN_COMMENTS, true) < 0)
-		{
-			obidebug(1, "\nError creating the first column for the sequences when aligning");
-			return -1;
-		}
 		seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME);
-
-		// Create the column for the second sequences aligned
-		if (obi_view_add_column(output_view, SEQ2_COLUMN_NAME, -1, SEQ2_COLUMN_NAME, OBI_SEQ, 0, 1, NULL, (iseq_column->header)->indexer_name, NULL, -1, SEQ2_COLUMN_COMMENTS, true) < 0)
-		{
-			obidebug(1, "\nError creating the second column for the sequences when aligning");
-			return -1;
-		}
 		seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME);
 	}
 //	if (print_count)  // TODO count columns not implemented yet
 //	{
-//		// Create the column for the count of the first sequences aligned
-//		if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, COUNT1_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0)
-//		{
-//			obidebug(1, "\nError creating the first column for the sequence counts when aligning");
-//			return -1;
-//		}
 //		count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME);
-//
-//		// Create the column for the count of the second sequences aligned
-//		if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, COUNT2_COLUMN_NAME, OBI_INT, 0, 1, NULL, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0)
-//		{
-//			obidebug(1, "\nError creating the second column for the sequence counts when aligning");
-//			return -1;
-//		}
 //		count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME);
 //	}
 
-
 	// Build kmer tables
 	ktable = hash_seq_column(seq_view, iseq_column, seq_elt_idx);
 	if (ktable == NULL)
@@ -301,100 +571,20 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 			if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
 			{	// Print result		// TODO make separate function maybe
 
-				// Write line indices of the input view in the output view (to easily refer to the input sequences from the output view)
-				if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, idx1_column, k, 0, i) < 0)
-				{
-					obidebug(1, "\nError writing idx1 in a column");
-					return -1;
-				}
-				if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, idx2_column, k, 0, j) < 0)
-				{
-					obidebug(1, "\nError writing idx2 in a column");
-					return -1;
-				}
-
 				// Get ids idx
 				id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);	// TODO Could there be multiple IDs per line?
 				id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
 
-				// Write ids in output view
-				if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, id1_column, k, 0, id1_idx) < 0)
-				{
-					obidebug(1, "\nError writing id1 in a column");
+				if (print_alignment_result(output_view, k,
+										   idx1_column, idx2_column, i, j,
+										   id1_column, id2_column, id1_idx, id2_idx,
+						                   print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx,
+										   //print_count, count1_column, count2_column, count1, count2,
+										   ali_length_column, ali_length,
+										   lcs_length_column, lcs_length,
+										   score_column, score,
+										   reference, normalize, similarity_mode) < 0)
 					return -1;
-				}
-				if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, id2_column, k, 0, id2_idx) < 0)
-				{
-					obidebug(1, "\nError writing id2 in a column");
-					return -1;
-				}
-
-				// Write the sequences if needed
-				if (print_seq)
-				{
-					if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, seq1_column, k, 0, seq1_idx) < 0)
-					{
-						obidebug(1, "\nError writing seq1 in a column");
-						return -1;
-					}
-
-					if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, seq2_column, k, 0, seq2_idx) < 0)
-					{
-						obidebug(1, "\nError writing seq2 in a column");
-						return -1;
-					}
-				}
-
-//				// Write the counts if needed	// TODO count columns not implemented yet
-//				if (print_count)
-//				{
-//					if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count1_column, k, 0, count1) < 0)
-//					{
-//						obidebug(1, "\nError writing count1 in a column");
-//						return -1;
-//					}
-//
-//					if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count2_column, k, 0, count2) < 0)
-//					{
-//						obidebug(1, "\nError writing count2 in a column");
-//						return -1;
-//					}
-//				}
-
-				// Write the alignment length if it was computed
-				if ((reference == ALILEN) && (normalize || !similarity_mode))
-				{
-					if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, ali_length_column, k, 0, ali_length) < 0)
-					{
-						obidebug(1, "\nError writing alignment length in a column");
-						return -1;
-					}
-				}
-
-				// Write the LCS length
-				if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, lcs_length_column, k, 0, lcs_length) < 0)
-				{
-					obidebug(1, "\nError writing LCS length in a column");
-					return -1;
-				}
-
-				// Write score
-				if (normalize)
-				{
-					if (obi_set_float_with_elt_idx_and_col_p_in_view(output_view, score_column, k, 0, (obifloat_t) score) < 0)
-					{
-						obidebug(1, "\nError writing alignment score in a column");
-						return -1;
-					}
-				}
-				else
-				{
-					if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, score_column, k, 0, (obiint_t) score) < 0)
-					{
-						obidebug(1, "\nError writing alignment score in a column");
-						return -1;
-					}
-				}
 
 				k++;
 			}

From 191c83aafc5cbac301269323dbc41fa22f2455a7 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Thu, 15 Dec 2016 15:28:34 +0100
Subject: [PATCH 02/22] Added missing *.cfiles

---
 python/obitools3/commands/lcs.cfiles          | 65 +++++++++++++++++++
 python/obitools3/obidms/capi/obialign.cfiles  | 65 +++++++++++++++++++
 python/obitools3/obidms/capi/obidms.cfiles    | 65 +++++++++++++++++++
 .../obitools3/obidms/capi/obidmscolumn.cfiles | 65 +++++++++++++++++++
 python/obitools3/obidms/capi/obierrno.cfiles  | 65 +++++++++++++++++++
 .../obitools3/obidms/capi/obitaxonomy.cfiles  | 65 +++++++++++++++++++
 python/obitools3/obidms/capi/obitypes.cfiles  | 65 +++++++++++++++++++
 python/obitools3/obidms/capi/obiutils.cfiles  | 65 +++++++++++++++++++
 python/obitools3/obidms/capi/obiview.cfiles   | 65 +++++++++++++++++++
 9 files changed, 585 insertions(+)
 create mode 100644 python/obitools3/commands/lcs.cfiles
 create mode 100644 python/obitools3/obidms/capi/obialign.cfiles
 create mode 100644 python/obitools3/obidms/capi/obidms.cfiles
 create mode 100644 python/obitools3/obidms/capi/obidmscolumn.cfiles
 create mode 100644 python/obitools3/obidms/capi/obierrno.cfiles
 create mode 100644 python/obitools3/obidms/capi/obitaxonomy.cfiles
 create mode 100644 python/obitools3/obidms/capi/obitypes.cfiles
 create mode 100644 python/obitools3/obidms/capi/obiutils.cfiles
 create mode 100644 python/obitools3/obidms/capi/obiview.cfiles

diff --git a/python/obitools3/commands/lcs.cfiles b/python/obitools3/commands/lcs.cfiles
new file mode 100644
index 0000000..84e0436
--- /dev/null
+++ b/python/obitools3/commands/lcs.cfiles
@@ -0,0 +1,65 @@
+../../../src/bloom.h
+../../../src/bloom.c
+../../../src/char_str_indexer.h
+../../../src/char_str_indexer.c
+../../../src/crc64.h
+../../../src/crc64.c
+../../../src/dna_seq_indexer.h
+../../../src/dna_seq_indexer.c
+../../../src/encode.h
+../../../src/encode.c
+../../../src/hashtable.h
+../../../src/hashtable.c
+../../../src/murmurhash2.h
+../../../src/murmurhash2.c
+../../../src/obi_align.h
+../../../src/obi_align.c
+../../../src/obiavl.h
+../../../src/obiavl.c
+../../../src/obiblob_indexer.h
+../../../src/obiblob_indexer.c
+../../../src/obiblob.h
+../../../src/obiblob.c
+../../../src/obidebug.h
+../../../src/obidms_taxonomy.h
+../../../src/obidms_taxonomy.c
+../../../src/obidms.h
+../../../src/obidms.c
+../../../src/obidmscolumn_blob.c
+../../../src/obidmscolumn_blob.h
+../../../src/obidmscolumn_bool.c
+../../../src/obidmscolumn_bool.h
+../../../src/obidmscolumn_char.c
+../../../src/obidmscolumn_char.h
+../../../src/obidmscolumn_float.c
+../../../src/obidmscolumn_float.h
+../../../src/obidmscolumn_idx.h
+../../../src/obidmscolumn_idx.c
+../../../src/obidmscolumn_int.c
+../../../src/obidmscolumn_int.h
+../../../src/obidmscolumn_qual.h
+../../../src/obidmscolumn_qual.c
+../../../src/obidmscolumn_seq.c
+../../../src/obidmscolumn_seq.h
+../../../src/obidmscolumn_str.c
+../../../src/obidmscolumn_str.h
+../../../src/obidmscolumn.h
+../../../src/obidmscolumn.c
+../../../src/obidmscolumndir.h
+../../../src/obidmscolumndir.c
+../../../src/obierrno.h
+../../../src/obierrno.c
+../../../src/obilittlebigman.h
+../../../src/obilittlebigman.c
+../../../src/obitypes.h
+../../../src/obitypes.c
+../../../src/obiview.h
+../../../src/obiview.c
+../../../src/sse_banded_LCS_alignment.h
+../../../src/sse_banded_LCS_alignment.c
+../../../src/uint8_indexer.h
+../../../src/uint8_indexer.c
+../../../src/upperband.h
+../../../src/upperband.c
+../../../src/utils.h
+../../../src/utils.c
diff --git a/python/obitools3/obidms/capi/obialign.cfiles b/python/obitools3/obidms/capi/obialign.cfiles
new file mode 100644
index 0000000..3bbdbcb
--- /dev/null
+++ b/python/obitools3/obidms/capi/obialign.cfiles
@@ -0,0 +1,65 @@
+../../../../src/bloom.h
+../../../../src/bloom.c
+../../../../src/char_str_indexer.h
+../../../../src/char_str_indexer.c
+../../../../src/crc64.h
+../../../../src/crc64.c
+../../../../src/dna_seq_indexer.h
+../../../../src/dna_seq_indexer.c
+../../../../src/encode.h
+../../../../src/encode.c
+../../../../src/hashtable.h
+../../../../src/hashtable.c
+../../../../src/murmurhash2.h
+../../../../src/murmurhash2.c
+../../../../src/obi_align.h
+../../../../src/obi_align.c
+../../../../src/obiavl.h
+../../../../src/obiavl.c
+../../../../src/obiblob_indexer.h
+../../../../src/obiblob_indexer.c
+../../../../src/obiblob.h
+../../../../src/obiblob.c
+../../../../src/obidebug.h
+../../../../src/obidms_taxonomy.h
+../../../../src/obidms_taxonomy.c
+../../../../src/obidms.h
+../../../../src/obidms.c
+../../../../src/obidmscolumn_blob.c
+../../../../src/obidmscolumn_blob.h
+../../../../src/obidmscolumn_bool.c
+../../../../src/obidmscolumn_bool.h
+../../../../src/obidmscolumn_char.c
+../../../../src/obidmscolumn_char.h
+../../../../src/obidmscolumn_float.c
+../../../../src/obidmscolumn_float.h
+../../../../src/obidmscolumn_idx.h
+../../../../src/obidmscolumn_idx.c
+../../../../src/obidmscolumn_int.c
+../../../../src/obidmscolumn_int.h
+../../../../src/obidmscolumn_qual.h
+../../../../src/obidmscolumn_qual.c
+../../../../src/obidmscolumn_seq.c
+../../../../src/obidmscolumn_seq.h
+../../../../src/obidmscolumn_str.c
+../../../../src/obidmscolumn_str.h
+../../../../src/obidmscolumn.h
+../../../../src/obidmscolumn.c
+../../../../src/obidmscolumndir.h
+../../../../src/obidmscolumndir.c
+../../../../src/obierrno.h
+../../../../src/obierrno.c
+../../../../src/obilittlebigman.h
+../../../../src/obilittlebigman.c
+../../../../src/obitypes.h
+../../../../src/obitypes.c
+../../../../src/obiview.h
+../../../../src/obiview.c
+../../../../src/sse_banded_LCS_alignment.h
+../../../../src/sse_banded_LCS_alignment.c
+../../../../src/uint8_indexer.h
+../../../../src/uint8_indexer.c
+../../../../src/upperband.h
+../../../../src/upperband.c
+../../../../src/utils.h
+../../../../src/utils.c
diff --git a/python/obitools3/obidms/capi/obidms.cfiles b/python/obitools3/obidms/capi/obidms.cfiles
new file mode 100644
index 0000000..3bbdbcb
--- /dev/null
+++ b/python/obitools3/obidms/capi/obidms.cfiles
@@ -0,0 +1,65 @@
+../../../../src/bloom.h
+../../../../src/bloom.c
+../../../../src/char_str_indexer.h
+../../../../src/char_str_indexer.c
+../../../../src/crc64.h
+../../../../src/crc64.c
+../../../../src/dna_seq_indexer.h
+../../../../src/dna_seq_indexer.c
+../../../../src/encode.h
+../../../../src/encode.c
+../../../../src/hashtable.h
+../../../../src/hashtable.c
+../../../../src/murmurhash2.h
+../../../../src/murmurhash2.c
+../../../../src/obi_align.h
+../../../../src/obi_align.c
+../../../../src/obiavl.h
+../../../../src/obiavl.c
+../../../../src/obiblob_indexer.h
+../../../../src/obiblob_indexer.c
+../../../../src/obiblob.h
+../../../../src/obiblob.c
+../../../../src/obidebug.h
+../../../../src/obidms_taxonomy.h
+../../../../src/obidms_taxonomy.c
+../../../../src/obidms.h
+../../../../src/obidms.c
+../../../../src/obidmscolumn_blob.c
+../../../../src/obidmscolumn_blob.h
+../../../../src/obidmscolumn_bool.c
+../../../../src/obidmscolumn_bool.h
+../../../../src/obidmscolumn_char.c
+../../../../src/obidmscolumn_char.h
+../../../../src/obidmscolumn_float.c
+../../../../src/obidmscolumn_float.h
+../../../../src/obidmscolumn_idx.h
+../../../../src/obidmscolumn_idx.c
+../../../../src/obidmscolumn_int.c
+../../../../src/obidmscolumn_int.h
+../../../../src/obidmscolumn_qual.h
+../../../../src/obidmscolumn_qual.c
+../../../../src/obidmscolumn_seq.c
+../../../../src/obidmscolumn_seq.h
+../../../../src/obidmscolumn_str.c
+../../../../src/obidmscolumn_str.h
+../../../../src/obidmscolumn.h
+../../../../src/obidmscolumn.c
+../../../../src/obidmscolumndir.h
+../../../../src/obidmscolumndir.c
+../../../../src/obierrno.h
+../../../../src/obierrno.c
+../../../../src/obilittlebigman.h
+../../../../src/obilittlebigman.c
+../../../../src/obitypes.h
+../../../../src/obitypes.c
+../../../../src/obiview.h
+../../../../src/obiview.c
+../../../../src/sse_banded_LCS_alignment.h
+../../../../src/sse_banded_LCS_alignment.c
+../../../../src/uint8_indexer.h
+../../../../src/uint8_indexer.c
+../../../../src/upperband.h
+../../../../src/upperband.c
+../../../../src/utils.h
+../../../../src/utils.c
diff --git a/python/obitools3/obidms/capi/obidmscolumn.cfiles b/python/obitools3/obidms/capi/obidmscolumn.cfiles
new file mode 100644
index 0000000..3bbdbcb
--- /dev/null
+++ b/python/obitools3/obidms/capi/obidmscolumn.cfiles
@@ -0,0 +1,65 @@
+../../../../src/bloom.h
+../../../../src/bloom.c
+../../../../src/char_str_indexer.h
+../../../../src/char_str_indexer.c
+../../../../src/crc64.h
+../../../../src/crc64.c
+../../../../src/dna_seq_indexer.h
+../../../../src/dna_seq_indexer.c
+../../../../src/encode.h
+../../../../src/encode.c
+../../../../src/hashtable.h
+../../../../src/hashtable.c
+../../../../src/murmurhash2.h
+../../../../src/murmurhash2.c
+../../../../src/obi_align.h
+../../../../src/obi_align.c
+../../../../src/obiavl.h
+../../../../src/obiavl.c
+../../../../src/obiblob_indexer.h
+../../../../src/obiblob_indexer.c
+../../../../src/obiblob.h
+../../../../src/obiblob.c
+../../../../src/obidebug.h
+../../../../src/obidms_taxonomy.h
+../../../../src/obidms_taxonomy.c
+../../../../src/obidms.h
+../../../../src/obidms.c
+../../../../src/obidmscolumn_blob.c
+../../../../src/obidmscolumn_blob.h
+../../../../src/obidmscolumn_bool.c
+../../../../src/obidmscolumn_bool.h
+../../../../src/obidmscolumn_char.c
+../../../../src/obidmscolumn_char.h
+../../../../src/obidmscolumn_float.c
+../../../../src/obidmscolumn_float.h
+../../../../src/obidmscolumn_idx.h
+../../../../src/obidmscolumn_idx.c
+../../../../src/obidmscolumn_int.c
+../../../../src/obidmscolumn_int.h
+../../../../src/obidmscolumn_qual.h
+../../../../src/obidmscolumn_qual.c
+../../../../src/obidmscolumn_seq.c
+../../../../src/obidmscolumn_seq.h
+../../../../src/obidmscolumn_str.c
+../../../../src/obidmscolumn_str.h
+../../../../src/obidmscolumn.h
+../../../../src/obidmscolumn.c
+../../../../src/obidmscolumndir.h
+../../../../src/obidmscolumndir.c
+../../../../src/obierrno.h
+../../../../src/obierrno.c
+../../../../src/obilittlebigman.h
+../../../../src/obilittlebigman.c
+../../../../src/obitypes.h
+../../../../src/obitypes.c
+../../../../src/obiview.h
+../../../../src/obiview.c
+../../../../src/sse_banded_LCS_alignment.h
+../../../../src/sse_banded_LCS_alignment.c
+../../../../src/uint8_indexer.h
+../../../../src/uint8_indexer.c
+../../../../src/upperband.h
+../../../../src/upperband.c
+../../../../src/utils.h
+../../../../src/utils.c
diff --git a/python/obitools3/obidms/capi/obierrno.cfiles b/python/obitools3/obidms/capi/obierrno.cfiles
new file mode 100644
index 0000000..3bbdbcb
--- /dev/null
+++ b/python/obitools3/obidms/capi/obierrno.cfiles
@@ -0,0 +1,65 @@
+../../../../src/bloom.h
+../../../../src/bloom.c
+../../../../src/char_str_indexer.h
+../../../../src/char_str_indexer.c
+../../../../src/crc64.h
+../../../../src/crc64.c
+../../../../src/dna_seq_indexer.h
+../../../../src/dna_seq_indexer.c
+../../../../src/encode.h
+../../../../src/encode.c
+../../../../src/hashtable.h
+../../../../src/hashtable.c
+../../../../src/murmurhash2.h
+../../../../src/murmurhash2.c
+../../../../src/obi_align.h
+../../../../src/obi_align.c
+../../../../src/obiavl.h
+../../../../src/obiavl.c
+../../../../src/obiblob_indexer.h
+../../../../src/obiblob_indexer.c
+../../../../src/obiblob.h
+../../../../src/obiblob.c
+../../../../src/obidebug.h
+../../../../src/obidms_taxonomy.h
+../../../../src/obidms_taxonomy.c
+../../../../src/obidms.h
+../../../../src/obidms.c
+../../../../src/obidmscolumn_blob.c
+../../../../src/obidmscolumn_blob.h
+../../../../src/obidmscolumn_bool.c
+../../../../src/obidmscolumn_bool.h
+../../../../src/obidmscolumn_char.c
+../../../../src/obidmscolumn_char.h
+../../../../src/obidmscolumn_float.c
+../../../../src/obidmscolumn_float.h
+../../../../src/obidmscolumn_idx.h
+../../../../src/obidmscolumn_idx.c
+../../../../src/obidmscolumn_int.c
+../../../../src/obidmscolumn_int.h
+../../../../src/obidmscolumn_qual.h
+../../../../src/obidmscolumn_qual.c
+../../../../src/obidmscolumn_seq.c
+../../../../src/obidmscolumn_seq.h
+../../../../src/obidmscolumn_str.c
+../../../../src/obidmscolumn_str.h
+../../../../src/obidmscolumn.h
+../../../../src/obidmscolumn.c
+../../../../src/obidmscolumndir.h
+../../../../src/obidmscolumndir.c
+../../../../src/obierrno.h
+../../../../src/obierrno.c
+../../../../src/obilittlebigman.h
+../../../../src/obilittlebigman.c
+../../../../src/obitypes.h
+../../../../src/obitypes.c
+../../../../src/obiview.h
+../../../../src/obiview.c
+../../../../src/sse_banded_LCS_alignment.h
+../../../../src/sse_banded_LCS_alignment.c
+../../../../src/uint8_indexer.h
+../../../../src/uint8_indexer.c
+../../../../src/upperband.h
+../../../../src/upperband.c
+../../../../src/utils.h
+../../../../src/utils.c
diff --git a/python/obitools3/obidms/capi/obitaxonomy.cfiles b/python/obitools3/obidms/capi/obitaxonomy.cfiles
new file mode 100644
index 0000000..3bbdbcb
--- /dev/null
+++ b/python/obitools3/obidms/capi/obitaxonomy.cfiles
@@ -0,0 +1,65 @@
+../../../../src/bloom.h
+../../../../src/bloom.c
+../../../../src/char_str_indexer.h
+../../../../src/char_str_indexer.c
+../../../../src/crc64.h
+../../../../src/crc64.c
+../../../../src/dna_seq_indexer.h
+../../../../src/dna_seq_indexer.c
+../../../../src/encode.h
+../../../../src/encode.c
+../../../../src/hashtable.h
+../../../../src/hashtable.c
+../../../../src/murmurhash2.h
+../../../../src/murmurhash2.c
+../../../../src/obi_align.h
+../../../../src/obi_align.c
+../../../../src/obiavl.h
+../../../../src/obiavl.c
+../../../../src/obiblob_indexer.h
+../../../../src/obiblob_indexer.c
+../../../../src/obiblob.h
+../../../../src/obiblob.c
+../../../../src/obidebug.h
+../../../../src/obidms_taxonomy.h
+../../../../src/obidms_taxonomy.c
+../../../../src/obidms.h
+../../../../src/obidms.c
+../../../../src/obidmscolumn_blob.c
+../../../../src/obidmscolumn_blob.h
+../../../../src/obidmscolumn_bool.c
+../../../../src/obidmscolumn_bool.h
+../../../../src/obidmscolumn_char.c
+../../../../src/obidmscolumn_char.h
+../../../../src/obidmscolumn_float.c
+../../../../src/obidmscolumn_float.h
+../../../../src/obidmscolumn_idx.h
+../../../../src/obidmscolumn_idx.c
+../../../../src/obidmscolumn_int.c
+../../../../src/obidmscolumn_int.h
+../../../../src/obidmscolumn_qual.h
+../../../../src/obidmscolumn_qual.c
+../../../../src/obidmscolumn_seq.c
+../../../../src/obidmscolumn_seq.h
+../../../../src/obidmscolumn_str.c
+../../../../src/obidmscolumn_str.h
+../../../../src/obidmscolumn.h
+../../../../src/obidmscolumn.c
+../../../../src/obidmscolumndir.h
+../../../../src/obidmscolumndir.c
+../../../../src/obierrno.h
+../../../../src/obierrno.c
+../../../../src/obilittlebigman.h
+../../../../src/obilittlebigman.c
+../../../../src/obitypes.h
+../../../../src/obitypes.c
+../../../../src/obiview.h
+../../../../src/obiview.c
+../../../../src/sse_banded_LCS_alignment.h
+../../../../src/sse_banded_LCS_alignment.c
+../../../../src/uint8_indexer.h
+../../../../src/uint8_indexer.c
+../../../../src/upperband.h
+../../../../src/upperband.c
+../../../../src/utils.h
+../../../../src/utils.c
diff --git a/python/obitools3/obidms/capi/obitypes.cfiles b/python/obitools3/obidms/capi/obitypes.cfiles
new file mode 100644
index 0000000..3bbdbcb
--- /dev/null
+++ b/python/obitools3/obidms/capi/obitypes.cfiles
@@ -0,0 +1,65 @@
+../../../../src/bloom.h
+../../../../src/bloom.c
+../../../../src/char_str_indexer.h
+../../../../src/char_str_indexer.c
+../../../../src/crc64.h
+../../../../src/crc64.c
+../../../../src/dna_seq_indexer.h
+../../../../src/dna_seq_indexer.c
+../../../../src/encode.h
+../../../../src/encode.c
+../../../../src/hashtable.h
+../../../../src/hashtable.c
+../../../../src/murmurhash2.h
+../../../../src/murmurhash2.c
+../../../../src/obi_align.h
+../../../../src/obi_align.c
+../../../../src/obiavl.h
+../../../../src/obiavl.c
+../../../../src/obiblob_indexer.h
+../../../../src/obiblob_indexer.c
+../../../../src/obiblob.h
+../../../../src/obiblob.c
+../../../../src/obidebug.h
+../../../../src/obidms_taxonomy.h
+../../../../src/obidms_taxonomy.c
+../../../../src/obidms.h
+../../../../src/obidms.c
+../../../../src/obidmscolumn_blob.c
+../../../../src/obidmscolumn_blob.h
+../../../../src/obidmscolumn_bool.c
+../../../../src/obidmscolumn_bool.h
+../../../../src/obidmscolumn_char.c
+../../../../src/obidmscolumn_char.h
+../../../../src/obidmscolumn_float.c
+../../../../src/obidmscolumn_float.h
+../../../../src/obidmscolumn_idx.h
+../../../../src/obidmscolumn_idx.c
+../../../../src/obidmscolumn_int.c
+../../../../src/obidmscolumn_int.h
+../../../../src/obidmscolumn_qual.h
+../../../../src/obidmscolumn_qual.c
+../../../../src/obidmscolumn_seq.c
+../../../../src/obidmscolumn_seq.h
+../../../../src/obidmscolumn_str.c
+../../../../src/obidmscolumn_str.h
+../../../../src/obidmscolumn.h
+../../../../src/obidmscolumn.c
+../../../../src/obidmscolumndir.h
+../../../../src/obidmscolumndir.c
+../../../../src/obierrno.h
+../../../../src/obierrno.c
+../../../../src/obilittlebigman.h
+../../../../src/obilittlebigman.c
+../../../../src/obitypes.h
+../../../../src/obitypes.c
+../../../../src/obiview.h
+../../../../src/obiview.c
+../../../../src/sse_banded_LCS_alignment.h
+../../../../src/sse_banded_LCS_alignment.c
+../../../../src/uint8_indexer.h
+../../../../src/uint8_indexer.c
+../../../../src/upperband.h
+../../../../src/upperband.c
+../../../../src/utils.h
+../../../../src/utils.c
diff --git a/python/obitools3/obidms/capi/obiutils.cfiles b/python/obitools3/obidms/capi/obiutils.cfiles
new file mode 100644
index 0000000..3bbdbcb
--- /dev/null
+++ b/python/obitools3/obidms/capi/obiutils.cfiles
@@ -0,0 +1,65 @@
+../../../../src/bloom.h
+../../../../src/bloom.c
+../../../../src/char_str_indexer.h
+../../../../src/char_str_indexer.c
+../../../../src/crc64.h
+../../../../src/crc64.c
+../../../../src/dna_seq_indexer.h
+../../../../src/dna_seq_indexer.c
+../../../../src/encode.h
+../../../../src/encode.c
+../../../../src/hashtable.h
+../../../../src/hashtable.c
+../../../../src/murmurhash2.h
+../../../../src/murmurhash2.c
+../../../../src/obi_align.h
+../../../../src/obi_align.c
+../../../../src/obiavl.h
+../../../../src/obiavl.c
+../../../../src/obiblob_indexer.h
+../../../../src/obiblob_indexer.c
+../../../../src/obiblob.h
+../../../../src/obiblob.c
+../../../../src/obidebug.h
+../../../../src/obidms_taxonomy.h
+../../../../src/obidms_taxonomy.c
+../../../../src/obidms.h
+../../../../src/obidms.c
+../../../../src/obidmscolumn_blob.c
+../../../../src/obidmscolumn_blob.h
+../../../../src/obidmscolumn_bool.c
+../../../../src/obidmscolumn_bool.h
+../../../../src/obidmscolumn_char.c
+../../../../src/obidmscolumn_char.h
+../../../../src/obidmscolumn_float.c
+../../../../src/obidmscolumn_float.h
+../../../../src/obidmscolumn_idx.h
+../../../../src/obidmscolumn_idx.c
+../../../../src/obidmscolumn_int.c
+../../../../src/obidmscolumn_int.h
+../../../../src/obidmscolumn_qual.h
+../../../../src/obidmscolumn_qual.c
+../../../../src/obidmscolumn_seq.c
+../../../../src/obidmscolumn_seq.h
+../../../../src/obidmscolumn_str.c
+../../../../src/obidmscolumn_str.h
+../../../../src/obidmscolumn.h
+../../../../src/obidmscolumn.c
+../../../../src/obidmscolumndir.h
+../../../../src/obidmscolumndir.c
+../../../../src/obierrno.h
+../../../../src/obierrno.c
+../../../../src/obilittlebigman.h
+../../../../src/obilittlebigman.c
+../../../../src/obitypes.h
+../../../../src/obitypes.c
+../../../../src/obiview.h
+../../../../src/obiview.c
+../../../../src/sse_banded_LCS_alignment.h
+../../../../src/sse_banded_LCS_alignment.c
+../../../../src/uint8_indexer.h
+../../../../src/uint8_indexer.c
+../../../../src/upperband.h
+../../../../src/upperband.c
+../../../../src/utils.h
+../../../../src/utils.c
diff --git a/python/obitools3/obidms/capi/obiview.cfiles b/python/obitools3/obidms/capi/obiview.cfiles
new file mode 100644
index 0000000..3bbdbcb
--- /dev/null
+++ b/python/obitools3/obidms/capi/obiview.cfiles
@@ -0,0 +1,65 @@
+../../../../src/bloom.h
+../../../../src/bloom.c
+../../../../src/char_str_indexer.h
+../../../../src/char_str_indexer.c
+../../../../src/crc64.h
+../../../../src/crc64.c
+../../../../src/dna_seq_indexer.h
+../../../../src/dna_seq_indexer.c
+../../../../src/encode.h
+../../../../src/encode.c
+../../../../src/hashtable.h
+../../../../src/hashtable.c
+../../../../src/murmurhash2.h
+../../../../src/murmurhash2.c
+../../../../src/obi_align.h
+../../../../src/obi_align.c
+../../../../src/obiavl.h
+../../../../src/obiavl.c
+../../../../src/obiblob_indexer.h
+../../../../src/obiblob_indexer.c
+../../../../src/obiblob.h
+../../../../src/obiblob.c
+../../../../src/obidebug.h
+../../../../src/obidms_taxonomy.h
+../../../../src/obidms_taxonomy.c
+../../../../src/obidms.h
+../../../../src/obidms.c
+../../../../src/obidmscolumn_blob.c
+../../../../src/obidmscolumn_blob.h
+../../../../src/obidmscolumn_bool.c
+../../../../src/obidmscolumn_bool.h
+../../../../src/obidmscolumn_char.c
+../../../../src/obidmscolumn_char.h
+../../../../src/obidmscolumn_float.c
+../../../../src/obidmscolumn_float.h
+../../../../src/obidmscolumn_idx.h
+../../../../src/obidmscolumn_idx.c
+../../../../src/obidmscolumn_int.c
+../../../../src/obidmscolumn_int.h
+../../../../src/obidmscolumn_qual.h
+../../../../src/obidmscolumn_qual.c
+../../../../src/obidmscolumn_seq.c
+../../../../src/obidmscolumn_seq.h
+../../../../src/obidmscolumn_str.c
+../../../../src/obidmscolumn_str.h
+../../../../src/obidmscolumn.h
+../../../../src/obidmscolumn.c
+../../../../src/obidmscolumndir.h
+../../../../src/obidmscolumndir.c
+../../../../src/obierrno.h
+../../../../src/obierrno.c
+../../../../src/obilittlebigman.h
+../../../../src/obilittlebigman.c
+../../../../src/obitypes.h
+../../../../src/obitypes.c
+../../../../src/obiview.h
+../../../../src/obiview.c
+../../../../src/sse_banded_LCS_alignment.h
+../../../../src/sse_banded_LCS_alignment.c
+../../../../src/uint8_indexer.h
+../../../../src/uint8_indexer.c
+../../../../src/upperband.h
+../../../../src/upperband.c
+../../../../src/utils.h
+../../../../src/utils.c

From 490f5fe6b9990740e6ed2610d2a9c3c2a107345e Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Fri, 16 Dec 2016 19:04:21 +0100
Subject: [PATCH 03/22] Updated deprecated code in cython API for columns
 (using line count of view instead of column)

---
 python/obitools3/obidms/_obidms.pyx | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx
index c35cd14..7f86c59 100644
--- a/python/obitools3/obidms/_obidms.pyx
+++ b/python/obitools3/obidms/_obidms.pyx
@@ -100,17 +100,17 @@ cdef class OBIDMS_column :
     def __getitem__(self, index_t line_nb):
         return self.get_line(line_nb)
 
-    def __len__(self):
-        return self.lines_used
+    def __len__(self):        # TODO discuss
+        return self._view.line_count
     
     def __sizeof__(self):
         return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size)
     
-    def __iter__(self):
+    def __iter__(self):      # TODO discuss
         # Declarations
         cdef index_t line_nb  
         # Yield each line
-        for line_nb in range(self.lines_used):
+        for line_nb in range(self._view.line_count): 
             yield self.get_line(line_nb)
                 
     def __str__(self) :
@@ -160,11 +160,6 @@ cdef class OBIDMS_column :
     def version(self):
         return ((self._pointer)[0].header).version
 
-    # lines_used property getter
-    @property
-    def lines_used(self):
-        return (self._pointer)[0].header.lines_used
-
     # comments property getter
     @property
     def comments(self):

From 303bd6f4454458670d5c1c5b34291ddc24f925c4 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Fri, 16 Dec 2016 19:10:18 +0100
Subject: [PATCH 04/22] Added function to build kmer table for 2 columns, and
 fixed bug (with line count) when building kmer table of one column

---
 src/upperband.c | 42 +++++++++++++++++++++++++++++++++++++++---
 src/upperband.h |  4 ++++
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/upperband.c b/src/upperband.c
index 9eba088..548d09e 100644
--- a/src/upperband.c
+++ b/src/upperband.c
@@ -8,8 +8,6 @@
 #include "obidmscolumn.h"
 #include "obiview.h"
 
-//#include "../libutils/utilities.h"
-//#include "../libfasta/sequence.h"
 
 
 inline static uchar_v hash4m128(uchar_v frag)
@@ -242,7 +240,7 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t se
 
 	fprintf(stderr,"Building kmer tables...");
 
-	seq_count = (seq_col->header)->lines_used;
+	seq_count = (view->infos)->line_count;
 
 	// Allocate memory for the table structure
 	ktable = (Kmer_table_p) malloc(sizeof(Kmer_table_t) * seq_count);
@@ -267,6 +265,44 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t se
 }
 
 
+Kmer_table_p hash_two_seq_columns(Obiview_p view1, OBIDMS_column_p seq1_col, index_t seq1_idx,
+								  Obiview_p view2, OBIDMS_column_p seq2_col, index_t seq2_idx)
+{
+	size_t       seq1_count;
+	size_t       seq2_count;
+	Kmer_table_p ktable1;
+	Kmer_table_p ktable2;
+	Kmer_table_p ktable;
+
+	seq1_count = (view1->infos)->line_count;
+	seq2_count = (view2->infos)->line_count;
+
+	// Build the two tables then concatenate them
+	ktable1 = hash_seq_column(view1, seq1_col, seq1_idx);
+	if (ktable1 == NULL)
+		return NULL;
+	ktable2 = hash_seq_column(view2, seq2_col, seq2_idx);
+	if (ktable2 == NULL)
+		return NULL;
+
+	// Realloc to hold the 2 tables
+	ktable = realloc(ktable1, sizeof(Kmer_table_t) * (seq1_count + seq2_count));
+	if (ktable == NULL)
+	{
+		free_kmer_tables(ktable2, seq2_count);
+		return NULL;
+	}
+
+	// Concatenate
+	memcpy(ktable+seq1_count, ktable2, sizeof(Kmer_table_t) * seq2_count);
+
+	// Free copied table
+	free(ktable2);
+
+	return ktable;
+}
+
+
 void free_kmer_tables(Kmer_table_p ktable, size_t count)
 {
 	size_t      i;
diff --git a/src/upperband.h b/src/upperband.h
index f378287..087ac8b 100644
--- a/src/upperband.h
+++ b/src/upperband.h
@@ -18,7 +18,11 @@ typedef struct {
 } Kmer_table_t, *Kmer_table_p;
 
 
+// TODO doc
+
 Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t seq_idx);
+Kmer_table_p hash_two_seq_columns(Obiview_p view1, OBIDMS_column_p seq1_col, index_t seq1_idx,
+								  Obiview_p view2, OBIDMS_column_p seq2_col, index_t seq2_idx);
 void align_filters(Kmer_table_p ktable, Obi_blob_p seq1, Obi_blob_p seq2, index_t idx1, index_t idx2, double threshold, bool normalize, int reference, bool similarity_mode, double* score, int* LCSmin, bool can_be_identical);
 void free_kmer_tables(Kmer_table_p ktable, size_t count);
 

From d99447c12b2505ce96d874cf2e65a63bb1884777 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Fri, 16 Dec 2016 19:39:02 +0100
Subject: [PATCH 05/22] C function for LCS alignment of two columns, and
 optimized and fixed line count bug in function to align one column

---
 src/obi_align.c | 466 +++++++++++++++++++++++++++++++++++++++---------
 src/obi_align.h |  61 ++++++-
 2 files changed, 430 insertions(+), 97 deletions(-)

diff --git a/src/obi_align.c b/src/obi_align.c
index a23e27f..97f6d4c 100644
--- a/src/obi_align.c
+++ b/src/obi_align.c
@@ -1,12 +1,12 @@
 /****************************************************************************
- * Sequence alignment functions				                                *
+ * LCS sequence alignment functions				                            *
  ****************************************************************************/
 
 /**
  * @file obi_align.c
  * @author Celine Mercier
  * @date May 4th 2016
- * @brief Functions handling sequence alignments.
+ * @brief Functions handling LCS sequence alignments.
  */
 
 
@@ -407,7 +407,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 	Obiview_p       seq_view = NULL;
 	Obiview_p       output_view = NULL;
 	OBIDMS_column_p iseq_column = NULL;
-	OBIDMS_column_p id_column;
+	OBIDMS_column_p id_column = NULL;
 	OBIDMS_column_p id1_column = NULL;
 	OBIDMS_column_p id2_column = NULL;
 	OBIDMS_column_p seq1_column = NULL;
@@ -451,6 +451,14 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 		return -1;
 	}
 
+	// Check column type
+	if ((iseq_column->header)->returned_data_type != OBI_SEQ)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError: column given to align is not an OBI_SEQ column");
+		return -1;
+	}
+
 	// Get element index of the sequence to align in each line to compute it only once
 	if ((strcmp(seq_elt_name, "") != 0) && (seq_elt_name != NULL))
 	{
@@ -527,21 +535,30 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 		return -1;
 	}
 
-	seq_count = (iseq_column->header)->lines_used;
+	seq_count = (seq_view->infos)->line_count;
 
 	for (i=0; i < (seq_count - 1); i++)
 	{
 		if (i%100 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) seq_count)*100);
 
+		// Get first id idx
+		id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);	// TODO Could there be multiple IDs per line?
+		// Get first sequence and its index
+		seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
+		blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
+		if (blob1 == NULL)
+		{
+			obidebug(1, "\nError retrieving sequences to align");
+			return -1;
+		}
+
 		for (j=i+1; j < seq_count; j++)
 		{
-			blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
-			blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
-			seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
+			// Get second sequence and its index
 			seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
-
-			if ((blob1 == NULL) || (blob2 == NULL))
+			blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
+			if (blob2 == NULL)
 			{
 				obidebug(1, "\nError retrieving sequences to align");
 				return -1;
@@ -569,10 +586,9 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 			}
 
 			if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
-			{	// Print result		// TODO make separate function maybe
+			{	// Print result
 
-				// Get ids idx
-				id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);	// TODO Could there be multiple IDs per line?
+				// Get second id idx
 				id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
 
 				if (print_alignment_result(output_view, k,
@@ -609,82 +625,354 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 }
 
 
-// TODO discuss if 2 input views or 2 columns or both possible
-//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,	// TODO it's implied both seq columns are in the same view but maybe it shouldn't
-//						  Obiview_p score_view, OBIDMS_column_p score_column,
-//						  double threshold, bool normalize, int reference, bool similarity_mode)
-//{
-//	index_t i, j, k;
-//	index_t seq_count_1;
-//	index_t seq_count_2;
-//	char* seq1;
-//	char* seq2;
-//	double score;
-//
-//	k = 0;
-//
-//	if (((seq_column_1->header)->returned_data_type != OBI_SEQ) || ((seq_column_2->header)->returned_data_type != OBI_SEQ))
-//	{
-//		obi_set_errno(OBI_ALIGN_ERROR);
-//		obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ");
-//		return -1;
-//	}
-//
-//	if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) ||
-//			(!normalize && ((score_column->header)->returned_data_type != OBI_INT)))
-//	{
-//		obi_set_errno(OBI_ALIGN_ERROR);
-//		obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type");
-//		return -1;
-//	}
-//
-//	seq_count_1 = (seq_column_1->header)->lines_used;
-//	seq_count_2 = (seq_column_2->header)->lines_used;
-//
-//	for (i=0; i < (seq_count_1 - 1); i++)
-//	{
-//		for (j=0; j < seq_count_2; j++)
-//		{
-//			//fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
-//
-//			seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_1, i, 0);
-//			seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_2, j, 0);
-//
-//			if ((seq1 == NULL) || (seq2 == NULL))
-//			{
-//				obidebug(1, "\nError retrieving sequences to align");
-//				return -1;
-//			}
-//
-//			// TODO kmer filter
-//
-//			score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
-//
-//			if (normalize)
-//			{
-//				if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
-//				{
-//					obidebug(1, "\nError writing alignment score in a column");
-//					return -1;
-//				}
-//			}
-//			else
-//			{
-//				if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
-//				{
-//					obidebug(1, "\nError writing alignment score in a column");
-//					return -1;
-//				}
-//			}
-//
-//			free(seq1);
-//			free(seq2);
-//
-//			k++;
-//		}
-//	}
-//
-//	return 0;
-//}
+int obi_lcs_align_two_columns(OBIDMS_p dms,
+							  const char* seq1_view_name,
+							  const char* seq2_view_name,
+							  const char* seq1_column_name,
+							  const char* seq2_column_name,
+							  const char* seq1_elt_name,
+							  const char* seq2_elt_name,
+							  const char* id1_column_name,
+							  const char* id2_column_name,
+					          const char* output_view_name, const char* output_view_comments,
+							  bool print_seq, bool print_count,
+						      double threshold, bool normalize, int reference, bool similarity_mode)
+{
+	index_t         i, j, k;
+	index_t         seq1_count;
+	index_t         seq2_count;
+	index_t         id1_idx, id2_idx;
+	index_t         seq1_idx, seq2_idx;
+	double          score;
+	int             lcs_length;
+	int             ali_length;
+	Kmer_table_p    ktable;
+	Obi_blob_p      blob1;
+	Obi_blob_p   	blob2;
+	int				lcs_min;
+	index_t         seq1_elt_idx;
+	index_t         seq2_elt_idx;
+	bool 			same_indexer;
+
+	Obiview_p       seq1_view = NULL;
+	Obiview_p       seq2_view = NULL;
+	Obiview_p       output_view = NULL;
+	OBIDMS_column_p i_seq1_column = NULL;
+	OBIDMS_column_p i_seq2_column = NULL;
+	OBIDMS_column_p i_id1_column = NULL;
+	OBIDMS_column_p i_id2_column = NULL;
+	OBIDMS_column_p id1_column = NULL;
+	OBIDMS_column_p id2_column = NULL;
+	OBIDMS_column_p seq1_column = NULL;
+	OBIDMS_column_p seq2_column = NULL;
+	//OBIDMS_column_p count1_column = NULL;
+	//OBIDMS_column_p count2_column = NULL;
+	OBIDMS_column_p idx1_column = NULL;
+	OBIDMS_column_p idx2_column = NULL;
+	OBIDMS_column_p lcs_length_column = NULL;
+	OBIDMS_column_p ali_length_column = NULL;
+	OBIDMS_column_p score_column = NULL;
+
+	k = 0;
+
+	// Open the first input view
+	seq1_view = obi_open_view(dms, seq1_view_name);
+	if (seq1_view == NULL)
+	{
+		obidebug(1, "\nError opening the first input view to align");
+		return -1;
+	}
+
+	// Open the second input view. Same as 1st if ""
+	if (strcmp(seq2_view_name, "") == 0)
+		seq2_view = seq1_view;
+	else
+	{
+		seq2_view = obi_open_view(dms, seq2_view_name);
+		if (seq2_view == NULL)
+		{
+			obidebug(1, "\nError opening the second input view to align");
+			return -1;
+		}
+	}
+
+	// Open the first sequence column to align
+	// If a column name wasn't given, open default sequence column
+	if (strcmp(seq1_column_name, "") == 0)
+	{
+		if (strcmp((seq1_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
+			i_seq1_column = obi_view_get_column(seq1_view, NUC_SEQUENCE_COLUMN);
+		else
+		{
+			obi_set_errno(OBI_ALIGN_ERROR);
+			obidebug(1, "\nError: no first column given to align");
+			return -1;
+		}
+	}
+	else
+		i_seq1_column = obi_view_get_column(seq1_view, seq1_column_name);
+	if (i_seq1_column == NULL)
+	{
+		obidebug(1, "\nError getting the first column to align");
+		return -1;
+	}
+
+	// Check column type
+	if ((i_seq1_column->header)->returned_data_type != OBI_SEQ)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError: first column given to align is not an OBI_SEQ column");
+		return -1;
+	}
+
+	// Open the second sequence column to align
+	// If a column name wasn't given, open default sequence column
+	if (strcmp(seq2_column_name, "") == 0)
+	{
+		if (strcmp((seq2_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
+			i_seq2_column = obi_view_get_column(seq2_view, NUC_SEQUENCE_COLUMN);
+		else
+		{
+			obi_set_errno(OBI_ALIGN_ERROR);
+			obidebug(1, "\nError: no second column given to align");
+			return -1;
+		}
+	}
+	else
+		i_seq2_column = obi_view_get_column(seq2_view, seq2_column_name);
+	if (i_seq2_column == NULL)
+	{
+		obidebug(1, "\nError getting the second column to align");
+		return -1;
+	}
+	// Check that the sequence columns are not both the default NUC_SEQ column of the same view
+	if (i_seq1_column == i_seq2_column)
+	{
+		obidebug(1, "\nError: trying to align a column with itself (default NUC_SEQ column of the same view)");
+		return -1;
+	}
+
+	// Check column type
+	if ((i_seq2_column->header)->returned_data_type != OBI_SEQ)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError: second column given to align is not an OBI_SEQ column");
+		return -1;
+	}
+
+	// Get element index of the sequence to align in each line of the first column to compute it only once
+	if ((strcmp(seq1_elt_name, "") != 0) && (seq1_elt_name != NULL))
+	{
+		seq1_elt_idx = obi_column_get_element_index_from_name(i_seq1_column, seq1_elt_name);
+		if (seq1_elt_idx == OBIIdx_NA)
+		{
+			obidebug(1, "\nError getting the sequence index in a column line when aligning");
+			return -1;
+		}
+	}
+	else
+		seq1_elt_idx = 0;
+
+	// Get element index of the sequence to align in each line of the second column to compute it only once
+	if ((strcmp(seq2_elt_name, "") != 0) && (seq2_elt_name != NULL))
+	{
+		seq2_elt_idx = obi_column_get_element_index_from_name(i_seq2_column, seq2_elt_name);
+		if (seq2_elt_idx == OBIIdx_NA)
+		{
+			obidebug(1, "\nError getting the sequence index in a column line when aligning");
+			return -1;
+		}
+	}
+	else
+		seq2_elt_idx = 0;
 
 
+	// Open the first ID column, containing the identifiers of the first sequence to align
+	// If a column name wasn't given, open default ID column
+	if (strcmp(id1_column_name, "") == 0)
+	{
+		if (strcmp((seq1_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
+			i_id1_column = obi_view_get_column(seq1_view, ID_COLUMN);
+		else
+		{
+			obi_set_errno(OBI_ALIGN_ERROR);
+			obidebug(1, "\nError: no first ID column given");
+			return -1;
+		}
+	}
+	else
+		i_id1_column = obi_view_get_column(seq1_view, id1_column_name);
+	if (i_id1_column == NULL)
+	{
+		obidebug(1, "\nError getting the first ID column");
+		return -1;
+	}
+
+	// Open the second ID column, containing the identifiers of the second sequence to align
+	// If a column name wasn't given, open default ID column
+	if (strcmp(id2_column_name, "") == 0)
+	{
+		if (strcmp((seq2_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
+			i_id2_column = obi_view_get_column(seq2_view, ID_COLUMN);
+		else
+		{
+			obi_set_errno(OBI_ALIGN_ERROR);
+			obidebug(1, "\nError: no second ID column given");
+			return -1;
+		}
+	}
+	else
+		i_id2_column = obi_view_get_column(seq2_view, id2_column_name);
+	if (i_id2_column == NULL)
+	{
+		obidebug(1, "\nError getting the second ID column");
+		return -1;
+	}
+
+	// Create the output view
+	output_view = obi_new_view(dms, output_view_name, NULL, NULL, output_view_comments);
+	if (output_view == NULL)
+	{
+		obidebug(1, "\nError creating the output view when aligning");
+		return -1;
+	}
+
+	// Create the output columns
+	if (create_alignment_output_columns(output_view,
+			(i_id1_column->header)->indexer_name, (i_id2_column->header)->indexer_name,
+			(i_seq1_column->header)->indexer_name, (i_seq2_column->header)->indexer_name,
+			print_seq, print_count, normalize, reference, similarity_mode) < 0)
+		return -1;
+	id1_column = obi_view_get_column(output_view, ID1_COLUMN_NAME);
+	id2_column = obi_view_get_column(output_view, ID2_COLUMN_NAME);
+	idx1_column = obi_view_get_column(output_view, IDX1_COLUMN_NAME);
+	idx2_column = obi_view_get_column(output_view, IDX2_COLUMN_NAME);
+    lcs_length_column = obi_view_get_column(output_view, LCS_LENGTH_COLUMN_NAME);
+	if ((reference == ALILEN) && (normalize || !similarity_mode))
+		ali_length_column = obi_view_get_column(output_view, ALI_LENGTH_COLUMN_NAME);
+	score_column = obi_view_get_column(output_view, SCORE_COLUMN_NAME);
+	if (print_seq)
+	{
+		seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME);
+		seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME);
+	}
+//	if (print_count)  // TODO count columns not implemented yet
+//	{
+//		count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME);
+//		count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME);
+//	}
+
+	// Check if the sequence columns share the same indexer (allows for quick checking of sequence equality)
+	if (strcmp((i_seq1_column->header)->indexer_name, (i_seq2_column->header)->indexer_name) == 0)
+		same_indexer = true;
+	else
+		same_indexer = false;
+
+	// Build kmer tables
+	ktable = hash_two_seq_columns(seq1_view, i_seq1_column, seq1_elt_idx, seq2_view, i_seq2_column, seq2_elt_idx);
+	if (ktable == NULL)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError building kmer tables before aligning");
+		return -1;
+	}
+
+	seq1_count = (seq1_view->infos)->line_count;
+	seq2_count = (seq2_view->infos)->line_count;
+
+	for (i=0; i < seq1_count; i++)
+	{
+		if (i%100 == 0)
+			fprintf(stderr,"\rDone : %f %%       ", (i / (float) seq1_count)*100);
+
+		// Get id index of first sequence
+		id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq1_view, i_id1_column, i, 0); // TODO Could there be multiple IDs per line?
+		// Get first sequence and its index
+		seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq1_view, i_seq1_column, i, seq1_elt_idx);
+		blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq1_view, i_seq1_column, i, seq1_elt_idx);
+		if (blob1 == NULL)
+		{
+			obidebug(1, "\nError retrieving sequences to align");
+			return -1;
+		}
+
+		for (j=0; j < seq2_count; j++)
+		{
+			// Get second sequence and its index
+			seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq2_view, i_seq2_column, j, seq2_elt_idx);
+			blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq2_view, i_seq2_column, j, seq2_elt_idx);
+			if (blob2 == NULL)
+			{
+				obidebug(1, "\nError retrieving sequences to align");
+				return -1;
+			}
+
+			// Check if the sequences are identical in a quick way (same index in the same indexer)
+			if (same_indexer && (seq1_idx == seq2_idx))
+			{
+				if (similarity_mode && normalize)
+					score = 1.0;
+				else if (!similarity_mode)
+					score = 0.0;
+				else
+					score = blob1->length_decoded_value;
+			}
+
+			else // the sequences aren't identical
+			{
+				// kmer filter (offset for the index of the kmer table of the 2nd sequence because the kmer tables of the 2 sequence columns are concatenated in one)
+				align_filters(ktable, blob1, blob2, i, seq1_count+j, threshold, normalize, reference, similarity_mode, &score, &lcs_min, false);
+
+				// Compute alignment score
+				if ((threshold == 0) || (score == -1.0))	// no threshold, or filter passed: align
+					score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
+			}
+
+			if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
+			{	// Print result
+
+				// Get second id idx
+				id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq2_view, i_id2_column, j, 0);
+
+				if (print_alignment_result(output_view, k,
+										   idx1_column, idx2_column, i, j,
+										   id1_column, id2_column, id1_idx, id2_idx,
+						                   print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx,
+										   //print_count, count1_column, count2_column, count1, count2,
+										   ali_length_column, ali_length,
+										   lcs_length_column, lcs_length,
+										   score_column, score,
+										   reference, normalize, similarity_mode) < 0)
+					return -1;
+
+				k++;
+			}
+		}
+	}
+
+	// Close views
+	if (seq2_view != seq1_view)
+	{
+		if (obi_close_view(seq2_view) < 0)
+		{
+			obidebug(1, "\nError closing the second input view after aligning");
+			return -1;
+		}
+	}
+	if (obi_close_view(seq1_view) < 0)
+	{
+		obidebug(1, "\nError closing the first input view after aligning");
+		return -1;
+	}
+
+	if (obi_close_view(output_view) < 0)
+	{
+		obidebug(1, "\nError closing the output view after aligning");
+		return -1;
+	}
+
+	free_kmer_tables(ktable, seq1_count + seq2_count);
+
+	return 0;
+}
+
diff --git a/src/obi_align.h b/src/obi_align.h
index c0d823c..68048bd 100644
--- a/src/obi_align.h
+++ b/src/obi_align.h
@@ -1,12 +1,12 @@
 /****************************************************************************
- * Sequence alignment functions header file	                                *
+ * LCS sequence alignment functions header file	                            *
  ****************************************************************************/
 
 /**
  * @file obi_align.h
  * @author Celine Mercier
  * @date May 11th 2016
- * @brief Header file for the functions handling the alignment of DNA sequences.
+ * @brief Header file for the functions handling the LCS alignment of DNA sequences.
  */
 
 
@@ -55,7 +55,7 @@
 
 
 /**
- * @brief Aligns a NUC_SEQ column with itself.
+ * @brief Aligns an OBI_SEQ column with itself.
  *
  * Note: The columns where the results are written are automatically named and created.
  *
@@ -96,14 +96,59 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
 
 
 /**
- * @brief
+ * @brief Aligns two OBI_SEQ columns.
  *
- * TODO
+ * The columns must belong to the same OBIDMS, but can belong to different views.
  *
+ * Note: The columns where the results are written are automatically named and created.
+ *
+ * @param dms A pointer on an OBIDMS.
+ * @param seq1_view_name The name of the view where the first column to align is.
+ * @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one).
+ * @param seq1_column_name The name of the first OBI_SEQ column in the input view to align.
+ *                         If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
+ * @param seq2_column_name The name of the second OBI_SEQ column in the input view to align.
+ *                         If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
+ * @param seq1_elt_name The name of the element in the first column corresponding to the sequence to align, if the column has multiple
+ *                      elements per line.
+ * @param seq2_elt_name The name of the element in the second column corresponding to the sequence to align, if the column has multiple
+ *                      elements per line.
+ * @param id1_column_name The name of the column in the first input view containing the identifiers of the first sequence to align.
+ *                        If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
+ * @param id2_column_name The name of the column in the second input view containing the identifiers of the second sequence to align.
+ *                        If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
+ * @param output_view_name The name of the output view where the results should be written (should not already exist).
+ * @param output_view_comments The comments that should be associated with the output view.
+ * @param print_seq A boolean indicating whether the aligned sequences should be copied in the output view.
+ * @param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view.
+ * @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
+ * 					for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
+ * 					e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
+ * 					the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
+ *                  it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
+ * @param normalize Whether the score should be normalized with the reference sequence length.
+ * @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
+ * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
+ *
+ * @returns A value indicating the success of the operation.
+ * @retval 0 if the operation was successfully completed.
+ * @retval -1 if an error occurred.
+ *
+ * @since December 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,
-//						  Obiview_p score_view, OBIDMS_column_p score_column,
-//						  double threshold, bool normalize, int reference, bool similarity_mode);
+int obi_lcs_align_two_columns(OBIDMS_p dms,
+							  const char* seq1_view_name,
+							  const char* seq2_view_name,
+							  const char* seq1_column_name,
+							  const char* seq2_column_name,
+							  const char* seq1_elt_name,
+							  const char* seq2_elt_name,
+							  const char* id1_column_name,
+							  const char* id2_column_name,
+					          const char* output_view_name, const char* output_view_comments,
+							  bool print_seq, bool print_count,
+						      double threshold, bool normalize, int reference, bool similarity_mode);
 
 
 #endif /* OBI_ALIGN_H_ */

From 857a5198e4dc33c46c1abf33b819cc10dd1c01a2 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Fri, 16 Dec 2016 19:40:36 +0100
Subject: [PATCH 06/22] Updated ``obi lcs`` for the LCS alignment of two
 columns

---
 python/obitools3/commands/lcs.pyx         | 45 ++++++++++++++++-------
 python/obitools3/obidms/capi/obialign.pxd | 18 +++++++++
 2 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/python/obitools3/commands/lcs.pyx b/python/obitools3/commands/lcs.pyx
index 0a8b3b0..8a8bea9 100644
--- a/python/obitools3/commands/lcs.pyx
+++ b/python/obitools3/commands/lcs.pyx
@@ -4,7 +4,8 @@ from obitools3.apps.progress cimport ProgressBar  # @UnresolvedImport
 from obitools3.obidms._obidms cimport OBIDMS    # TODO cimport doesn't work
 from obitools3.utils cimport str2bytes
 
-from obitools3.obidms.capi.obialign cimport obi_lcs_align_one_column
+from obitools3.obidms.capi.obialign cimport obi_lcs_align_one_column, \
+                                            obi_lcs_align_two_columns
 
 
 import time
@@ -161,19 +162,35 @@ cpdef align(str dms_n,
     cdef OBIDMS d         
     d = OBIDMS(dms_n)
 
-    # Align 1 column (2 columns not implemented yet)
-    if obi_lcs_align_one_column(d._pointer, \
-                                str2bytes(input_view_1_n), \
-                                str2bytes(input_column_1_n), \
-                                str2bytes(input_elt_1_n), \
-                                str2bytes(id_column_1_n), \
-                                str2bytes(output_view_n), \
-                                str2bytes(comments), \
-                                print_seq, \
-                                print_count, \
-                                threshold, normalize, reference, similarity_mode) < 0 :
-        raise Exception("Error aligning sequences")
-
+    if input_view_2_n == "" and input_column_2_n == "" :
+        if obi_lcs_align_one_column(d._pointer, \
+                                    str2bytes(input_view_1_n), \
+                                    str2bytes(input_column_1_n), \
+                                    str2bytes(input_elt_1_n), \
+                                    str2bytes(id_column_1_n), \
+                                    str2bytes(output_view_n), \
+                                    str2bytes(comments), \
+                                    print_seq, \
+                                    print_count, \
+                                    threshold, normalize, reference, similarity_mode) < 0 :
+            raise Exception("Error aligning sequences")
+    else :
+        if obi_lcs_align_two_columns(d._pointer, \
+                                     str2bytes(input_view_1_n), \
+                                     str2bytes(input_view_2_n), \
+                                     str2bytes(input_column_1_n), \
+                                     str2bytes(input_column_2_n), \
+                                     str2bytes(input_elt_1_n), \
+                                     str2bytes(input_elt_2_n), \
+                                     str2bytes(id_column_1_n), \
+                                     str2bytes(id_column_2_n), \
+                                     str2bytes(output_view_n), \
+                                     str2bytes(comments), \
+                                     print_seq, \
+                                     print_count, \
+                                     threshold, normalize, reference, similarity_mode) < 0 :
+            raise Exception("Error aligning sequences")        
+        
     d.close()
 
 
diff --git a/python/obitools3/obidms/capi/obialign.pxd b/python/obitools3/obidms/capi/obialign.pxd
index e9c105c..e76cabe 100644
--- a/python/obitools3/obidms/capi/obialign.pxd
+++ b/python/obitools3/obidms/capi/obialign.pxd
@@ -20,3 +20,21 @@ cdef extern from "obi_align.h" nogil:
                                  int reference, 
                                  bint similarity_mode)
 
+
+    int obi_lcs_align_two_columns(OBIDMS_p dms,
+                                  const_char_p seq1_view_name,
+                                  const_char_p seq2_view_name,
+                                  const_char_p seq1_column_name,
+                                  const_char_p seq2_column_name,
+                                  const_char_p seq1_elt_name,
+                                  const_char_p seq2_elt_name,
+                                  const_char_p id1_column_name,
+                                  const_char_p id2_column_name,
+                                  const_char_p output_view_name, 
+                                  const_char_p output_view_comments,
+                                  bint print_seq, 
+                                  bint print_count,
+                                  double threshold, 
+                                  bint normalize, 
+                                  int reference, 
+                                  bint similarity_mode);

From 9c71b06117784f4d7a2b2f61e1045d40e72ff3d8 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Mon, 19 Dec 2016 14:36:40 +0100
Subject: [PATCH 07/22] Removed deprecated TODOs

---
 python/obitools3/obidms/_obidmscolumn_str.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/obitools3/obidms/_obidmscolumn_str.pyx b/python/obitools3/obidms/_obidmscolumn_str.pyx
index 1401141..87a7936 100644
--- a/python/obitools3/obidms/_obidmscolumn_str.pyx
+++ b/python/obitools3/obidms/_obidmscolumn_str.pyx
@@ -22,7 +22,7 @@ cdef class OBIDMS_column_str(OBIDMS_column):
             result = None
         else :
             result = bytes2str(value)
-        # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
+        # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
         return result
 
     cpdef set_line(self, index_t line_nb, object value):
@@ -46,7 +46,7 @@ cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
             result = None
         else :
             result = bytes2str(value)
-        # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
+        # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
         return result
 
     cpdef object get_line(self, index_t line_nb) :
@@ -65,7 +65,7 @@ cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
                 value_in_result = None
             else :
                 value_in_result = bytes2str(value)
-            # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
+            # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
             result[self.elements_names[i]] = value_in_result
             if all_NA and (value_in_result is not None) :
                 all_NA = False

From 5c50e5b378a3240ce8ea07ef1fa2d4fce80eea79 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Tue, 20 Dec 2016 11:46:58 +0100
Subject: [PATCH 08/22] Embryo of code for openMP parallelization of LCS
 alignment but deactivated for now because can't make it compile with
 cython/clang

---
 python/obitools3/commands/lcs.pyx | 18 ++++++++++++++----
 src/obi_align.c                   | 12 +++++++++++-
 src/obi_align.h                   |  3 ++-
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/python/obitools3/commands/lcs.pyx b/python/obitools3/commands/lcs.pyx
index 8a8bea9..1591fd8 100644
--- a/python/obitools3/commands/lcs.pyx
+++ b/python/obitools3/commands/lcs.pyx
@@ -147,6 +147,13 @@ def addOptions(parser):
                        default=False,
                        help="Sequence counts are written in the output view. Default: they are not written.")
 
+    group.add_argument('--thread-count','-p',   # TODO should probably be in a specific option group
+                       action="store", dest="align:threadcount",
+                       metavar='<THREAD COUNT>',
+                       default=1,
+                       type=int,
+                       help="Number of threads to use for the computation. Default: one.")
+
 
 cpdef align(str dms_n, 
             str input_view_1_n, str output_view_n,
@@ -157,7 +164,8 @@ cpdef align(str dms_n,
             double threshold=0.0, bint normalize=True, 
             int reference=0, bint similarity_mode=True,
             bint print_seq=False, bint print_count=False,
-            comments="") :
+            comments="",
+            int thread_count=1) :
                  
     cdef OBIDMS d         
     d = OBIDMS(dms_n)
@@ -172,7 +180,8 @@ cpdef align(str dms_n,
                                     str2bytes(comments), \
                                     print_seq, \
                                     print_count, \
-                                    threshold, normalize, reference, similarity_mode) < 0 :
+                                    threshold, normalize, reference, similarity_mode,
+                                    thread_count) < 0 :
             raise Exception("Error aligning sequences")
     else :
         if obi_lcs_align_two_columns(d._pointer, \
@@ -216,8 +225,9 @@ def run(config):
           similarity_mode  = config['align']['similarity'],  \
           print_seq        = config['align']['printseq'],  \
           print_count      = config['align']['printcount'], \
-          comments         = comments)
-      
+          comments         = comments, \
+          thread_count     = config['align']['threadcount'])
+
     print("Done.")
 
     
diff --git a/src/obi_align.c b/src/obi_align.c
index 97f6d4c..3fa3678 100644
--- a/src/obi_align.c
+++ b/src/obi_align.c
@@ -9,6 +9,10 @@
  * @brief Functions handling LCS sequence alignments.
  */
 
+//#define OMP_SUPPORT // TODO
+#ifdef OMP_SUPPORT
+#include <omp.h>
+#endif
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -389,7 +393,8 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 							 const char* id_column_name,
 					         const char* output_view_name, const char* output_view_comments,
 							 bool print_seq, bool print_count,
-						     double threshold, bool normalize, int reference, bool similarity_mode)
+						     double threshold, bool normalize, int reference, bool similarity_mode,
+							 int thread_count)
 {
 	index_t         i, j, k;
 	index_t         seq_count;
@@ -537,6 +542,11 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 
 	seq_count = (seq_view->infos)->line_count;
 
+	#ifdef OMP_SUPPORT
+	omp_set_num_threads(thread_count);
+	#pragma omp parallel for
+	#endif
+
 	for (i=0; i < (seq_count - 1); i++)
 	{
 		if (i%100 == 0)
diff --git a/src/obi_align.h b/src/obi_align.h
index 68048bd..98da4da 100644
--- a/src/obi_align.h
+++ b/src/obi_align.h
@@ -92,7 +92,8 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
 							 const char* id_column_name,
 					         const char* output_view_name, const char* output_view_comments,
 							 bool print_seq, bool print_count,
-						     double threshold, bool normalize, int reference, bool similarity_mode);
+						     double threshold, bool normalize, int reference, bool similarity_mode,
+							 int thread_count);
 
 
 /**

From 30e4359c8556296331e7f5ee8d70e3ab94b56f2d Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Thu, 22 Dec 2016 17:03:51 +0100
Subject: [PATCH 09/22] LCS alignment: documentation for all the lowest level
 functions

---
 src/sse_banded_LCS_alignment.c | 279 ++++++++++++++++++++++++++++++---
 src/sse_banded_LCS_alignment.h | 105 ++++++++++++-
 2 files changed, 352 insertions(+), 32 deletions(-)

diff --git a/src/sse_banded_LCS_alignment.c b/src/sse_banded_LCS_alignment.c
index 07b5ffe..8790c6b 100644
--- a/src/sse_banded_LCS_alignment.c
+++ b/src/sse_banded_LCS_alignment.c
@@ -1,16 +1,22 @@
-/*
- * sse_banded_LCS_alignment.c
- *
- *  Created on: 7 nov. 2012
- *      Author: celine mercier
+/****************************************************************************
+ * LCS alignment of two sequences				                            *
+ ****************************************************************************/
+
+/**
+ * @file sse_banded_LCS_alignment.c
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ * @date November 7th 2012
+ * @brief Functions handling the alignment of two sequences to compute their Longest Common Sequence.
  */
 
 
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <stdint.h>
 #include <stdbool.h>
+#include <limits.h>
 
 #include "obierrno.h"
 #include "obidebug.h"
@@ -24,6 +30,231 @@
 #define DEBUG_LEVEL 0	// TODO has to be defined somewhere else (cython compil flag?)
 
 
+
+/**************************************************************************
+ *
+ * D E C L A R A T I O N   O F   T H E   P R I V A T E   F U N C T I O N S
+ *
+ **************************************************************************/
+
+
+/**
+ * @brief Internal function printing a 128 bits register as 8 16-bits integers.
+ *
+ * @param r The register to print.
+ *
+ * @author Eric Coissac (eric.coissac@metabarcoding.org)
+ */
+static void printreg(__m128i r);
+
+
+/**
+ * @brief Internal function extracting a 16-bits integer from a 128 bits register.
+ *
+ * @param r The register to read.
+ * @param p The position at which the integer should be read (between 0 and 7).
+ *
+ * @returns The extracted integer.
+ *
+ * @author Eric Coissac (eric.coissac@metabarcoding.org)
+ */
+static inline int extract_reg(__m128i r, int p);
+
+
+/**
+ * @brief Internal function aligning two sequences, computing the lengths of their Longest Common Subsequence and of their alignment.
+ *
+ * @warning The first argument (seq1) must correspond to the longest sequence.
+ *
+ * @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
+ * @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
+ * @param l1 The length of the first sequence.
+ * @param l2 The length of the second sequence.
+ * @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
+ * @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
+ * @param address A pointer, aligned on a 16 bits boundary, on the int array where the initial values for the alignment length are stored,
+ *                as prepared for the alignment by initializeAddressWithGaps().
+ * @param lcs_length A pointer on the int where the LCS length will be stored.
+ * @param ali_length A pointer on the int where the alignment length will be stored.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length);
+
+
+/**
+ * @brief Internal function aligning two sequences, computing the length of their Longest Common Subsequence (and not the alignment length).
+ *
+ * @warning The first argument (seq1) must correspond to the longest sequence.
+ *
+ * @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
+ * @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
+ * @param l1 The length of the first sequence.
+ * @param l2 The length of the second sequence.
+ * @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
+ * @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
+ * @param lcs_length A pointer on the int where the LCS length will be stored.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length);
+
+
+/**
+ * @brief Internal function calculating the length of the left band for the banded alignment.
+ *
+ * @param lmax The length of the longest sequence to align.
+ * @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
+ *
+ * @returns The length of the left band.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int calculateLeftBandLength(int lmax, int LCSmin);
+
+
+/**
+ * @brief Internal function calculating the length of the right band for the banded alignment.
+ *
+ * @param lmin The length of the shortest sequence to align.
+ * @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
+ *
+ * @returns The length of the right band.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int calculateRightBandLength(int lmin, int LCSmin);
+
+
+/**
+ * @brief Internal function calculating the length of the complete band for the banded alignment.
+ *
+ * @param bandLengthRight The length of the right band for the banded alignment, as computed by calculateRightBandLength().
+ * @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
+ *
+ * @returns The length of the complete band.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft);
+
+
+/**
+ * @brief Internal function calculating the size to allocate for the int array where the alignment length will be stored in the matrix.
+ *
+ * @param maxLen The length of the longest sequence to align.
+ * @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
+ *
+ * @returns The size to allocate in bytes.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int calculateSizeToAllocate(int maxLen, int LCSmin);
+
+
+/**
+ * @brief Internal function initializing the int array corresponding to a sequence to align with default values.
+ *
+ * @param seq The int array corresponding to the sequence to align, as prepared by putSeqInSeq() or putBlobInSeq().
+ * @param size The number of positions to initialize.
+ * @param iniValue The value that the positions should be initialized to.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+void iniSeq(int16_t* seq, int size, int16_t iniValue);
+
+
+/**
+ * @brief Internal function building the int array corresponding to a sequence to align.
+ *
+ * Each nucleotide is stored as a short int (int16_t).
+ *
+ * @param seq A pointer on the allocated int array.
+ * @param s A pointer on the character string corresponding to the sequence.
+ * @param l The length of the sequence.
+ * @param reverse A boolean indicating whether the sequence should be written reversed
+ *                (for the second sequence to align).
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+void putSeqInSeq(int16_t* seq, char* s, int l, bool reverse);
+
+
+/**
+ * @brief Internal function building the int array corresponding to an obiblob containing a sequence.
+ *
+ * Each nucleotide is stored as a short int (int16_t).
+ *
+ * @param seq A pointer on the allocated int array.
+ * @param b A pointer on the obiblob containing the sequence.
+ * @param l The length of the (decoded) sequence.
+ * @param reverse A boolean indicating whether the sequence should be written reversed
+ *                (for the second sequence to align).
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+void putBlobInSeq(int16_t* seq, Obi_blob_p b, int l, bool reverse);
+
+
+/**
+ * @brief Internal function preparing an int array with the initial values for the alignment lengths before the alignment.
+ *
+ * The int array containing the initial alignment lengths (corresponding to the first line of the diagonalized band of the alignment matrix)
+ * needs to be initialized with external gap lengths before the alignment.
+ *
+ * @param address A pointer, aligned on a 16 bits boundary, on the int array where the initial values for the alignment length are to be stored.
+ * @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
+ * @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
+ * @param lmax The length of the longest sequence to align.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int lmax);
+
+
+/**
+ * @brief Internal function aligning two sequences, computing the lengths of their Longest Common Subsequence and of their alignment.
+ *
+ * @warning The first argument (seq1) must correspond to the longest sequence.
+ *
+ * @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
+ * @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
+ * @param l1 The length of the first sequence.
+ * @param l2 The length of the second sequence.
+ * @param normalize Whether the score should be normalized with the reference sequence length.
+ * @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
+ * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
+ * @param address A pointer, aligned on a 16 bits boundary, on an allocated int array where the initial values for the alignment length will be stored.
+ * @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
+ * @param lcs_length A pointer on the int where the LCS length will be stored.
+ * @param ali_length A pointer on the int where the alignment length will be stored.
+ *
+ * @returns The alignment score (normalized according to the parameters).
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length);
+
+
+
+/************************************************************************
+ *
+ * D E F I N I T I O N   O F   T H E   P R I V A T E   F U N C T I O N S
+ *
+ ************************************************************************/
+
+
 static void  printreg(__m128i r)
 {
 	int16_t a0,a1,a2,a3,a4,a5,a6,a7;
@@ -61,7 +292,6 @@ static inline int extract_reg(__m128i r, int p)
 }
 
 
-// TODO warning on length order
 void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length)
 {
 	register int j;
@@ -287,7 +517,6 @@ void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int
 }
 
 
-// TODO warning on length order
 void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length)
 {
 	register int j;
@@ -319,7 +548,7 @@ void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int
 	// Initialisations
 
 	odd_BLL = bandLengthLeft & 1;
-	even_BLL  = !odd_BLL;
+	even_BLL = !odd_BLL;
 
 	numberOfRegistersPerLine = bandLengthTotal / 8;
 	numberOfRegistersFor3Lines   = 3 * numberOfRegistersPerLine;
@@ -446,15 +675,14 @@ int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft)
 }
 
 
-// TODO that's gonna be fun to doc
-int calculateSizeToAllocate(int maxLen, int minLen, int LCSmin)
+int calculateSizeToAllocate(int maxLen, int LCSmin)
 {
 	int size;
 
 	size = calculateLeftBandLength(maxLen, LCSmin);
 
 	size *=  2;
-	size  =  (size & (~ (int)7)) + (( size & (int)7) ? 8:0); // Closest greater 8 multiple
+	size  =  (size & (~ (int)7)) + ((size & (int)7) ? 8:0); // Closest greater 8 multiple
 	size *=  3;
 	size +=  16;
 
@@ -522,13 +750,13 @@ void putBlobInSeq(int16_t* seq, Obi_blob_p b, int l, bool reverse)
 }
 
 
-void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int l1)
+void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int lmax)
 {
 	int i;
 	int address_00, x_address_10, address_01, address_01_shifted;
 	int numberOfRegistersPerLine;
 	int bm;
-	int value=INT16_MAX-l1;
+	int value=INT16_MAX-lmax;
 
 	numberOfRegistersPerLine = bandLengthTotal / 8;
 	bm = bandLengthLeft%2;
@@ -556,7 +784,6 @@ void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLe
 }
 
 
-// TODO warning on length order
 double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length)
 {
 	double id;
@@ -610,10 +837,14 @@ double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool n
 
 
 
-// PUBLIC FUNCTIONS
+/**********************************************************************
+ *
+ * D E F I N I T I O N   O F   T H E   P U B L I C   F U N C T I O N S
+ *
+ **********************************************************************/
 
 
-int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool similarity_mode)
+int calculateLCSmin(int lmax, int lmin, double threshold, bool normalize, int reference, bool similarity_mode)
 {
 	int LCSmin;
 
@@ -622,16 +853,16 @@ int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int refere
 		if (normalize)
 		{
 			if (reference == MINLEN)
-				LCSmin = threshold*l2;
+				LCSmin = threshold*lmin;
 			else 		// ref = maxlen or alilen
-				LCSmin = threshold*l1;
+				LCSmin = threshold*lmax;
 		}
 		else if (similarity_mode)
 			LCSmin = threshold;
 		else if (reference == MINLEN) // not similarity_mode
-			LCSmin = l2 - threshold;
+			LCSmin = lmin - threshold;
 		else	// not similarity_mode and ref = maxlen or alilen
-			LCSmin = l1 - threshold;
+			LCSmin = lmax - threshold;
 	}
 	else
 		LCSmin = 0;
@@ -679,7 +910,7 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
 	// Allocate space for matrix band if the alignment length must be computed
 	if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
 	{
-		sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
+		sizeToAllocateForBand = calculateSizeToAllocate(lmax, LCSmin);
 		address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
 		if (address == NULL)
 		{
@@ -774,13 +1005,13 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
 	// Allocate space for matrix band if the alignment length must be computed
 	if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
 	{
-		sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
+		sizeToAllocateForBand = calculateSizeToAllocate(lmax, LCSmin);
 		address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
 		if (address == NULL)
 		{
 			obi_set_errno(OBI_MALLOC_ERROR);
-			obidebug(1, "\nError getting a memory address aligned on 16 bytes boundary");
-			return 0;	// TODO DOUBLE_MIN
+			obidebug(1, "\nError getting a memory address aligned on a 16 bits boundary");
+			return 0;	// TODO DOUBLE_MIN to flag error
 		}
 	}
 
diff --git a/src/sse_banded_LCS_alignment.h b/src/sse_banded_LCS_alignment.h
index 23f3358..f46bf94 100644
--- a/src/sse_banded_LCS_alignment.h
+++ b/src/sse_banded_LCS_alignment.h
@@ -1,10 +1,15 @@
-/*
- * sse_banded_LCS_alignment.h
- *
- *  Created on: november 29, 2012
- *      Author: mercier
+/****************************************************************************
+ * LCS alignment of two sequences header file                               *
+ ****************************************************************************/
+
+/**
+ * @file sse_banded_LCS_alignment.h
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ * @date November 7th 2012
+ * @brief header file for the functions handling the alignment of two sequences to compute their Longest Common Sequence.
  */
 
+
 #ifndef SSE_BANDED_LCS_ALIGNMENT_H_
 #define SSE_BANDED_LCS_ALIGNMENT_H_
 
@@ -15,13 +20,97 @@
 #include "obiblob.h"
 
 
-#define ALILEN (0)	// TODO enum
+/**
+ * @brief Macros for reference lengths to use when aligning.
+ *
+ * @since 2012
+ * @author Eric Coissac (eric.coissac@metabarcoding.org)
+ */
+#define ALILEN (0)
 #define MAXLEN (1)
 #define MINLEN (2)
 
-// TODO doc
-int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool lcsmode);
+
+/**
+ * @brief Function calculating the minimum length of the Longest Common Subsequence between two sequences to be above a chosen score threshold.
+ *
+ * @warning The first argument (lmax) must correspond to length of the longest sequence.
+ *
+ * @param lmax The length of the longest sequence to align.
+ * @param lmin The length of the shortest sequence to align.
+ * @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
+ * 					for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
+ * 					e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
+ * 					the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
+ *                  it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
+ * @param normalize Whether the score should be normalized with the reference sequence length.
+ * @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.	// TODO
+ * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
+ *
+ * @returns The minimum length of the Longest Common Subsequence between two sequences to be above the chosen score threshold.
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int calculateLCSmin(int lmax, int lmin, double threshold, bool normalize, int reference, bool similarity_mode);
+
+
+/**
+ * @brief Function aligning two sequences.
+ *
+ * The alignment algorithm is a banded global alignment algorithm, a modified version of the classical Needleman and Wunsch algorithm,
+ * and uses indices based on the length of the Longest Common Subsequence between the two sequences.
+ *
+ * Note: the sequences do not need to be ordered (e.g. with the longest sequence as first argument).
+ *
+ * @param seq1 A pointer on the character string corresponding to the first sequence.
+ * @param seq2 A pointer on the character string corresponding to the second sequence.
+ * @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
+ * 					for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
+ * 					e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
+ * 					the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
+ *                  it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
+ * @param normalize Whether the score should be normalized with the reference sequence length.
+ * @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.	// TODO
+ * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
+ * @param lcs_length A pointer on the int where the LCS length will be stored.
+ * @param ali_length A pointer on the int where the alignment length will be stored.
+ *
+ * @returns The alignment score (normalized according to the parameters).
+ *
+ * @since 2012
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
 double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
+
+
+/**
+ * @brief Function aligning two sequences encoded in obiblobs.
+ *
+ * The alignment algorithm is a banded global alignment algorithm, a modified version of the classical Needleman and Wunsch algorithm,
+ * and uses indices based on the length of the Longest Common Subsequence between the two sequences.
+ *
+ * Note: the obiblobs do not need to be ordered (e.g. with the obiblob containing the longest sequence as first argument).
+ *
+ * @param seq1 A pointer on the blob containing the first sequence.
+ * @param seq2 A pointer on the blob containing the second sequence.
+ * @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
+ * 					for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
+ * 					e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
+ * 					the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
+ *                  it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
+ * @param normalize Whether the score should be normalized with the reference sequence length.
+ * @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.	// TODO
+ * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
+ * @param lcs_length A pointer on the int where the LCS length will be stored.
+ * @param ali_length A pointer on the int where the alignment length will be stored.
+ *
+ * @returns The alignment score (normalized according to the parameters).
+ *
+ * @since December 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
 double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
 
+
 #endif

From 8e92bf6dacd5543d8120eac970cee2eeb4c0bff8 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Thu, 22 Dec 2016 17:06:23 +0100
Subject: [PATCH 10/22] LCS alignment: it is now checked that sequences are not
 longer than what a 16 bits integer can code for (as the LCS and alignment
 lengths are kept in 16 bits registers)

---
 src/sse_banded_LCS_alignment.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/sse_banded_LCS_alignment.c b/src/sse_banded_LCS_alignment.c
index 8790c6b..eab489b 100644
--- a/src/sse_banded_LCS_alignment.c
+++ b/src/sse_banded_LCS_alignment.c
@@ -900,6 +900,14 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
 		lmin = l1;
 	}
 
+	// Check that the sequences are not greater than what can be aligned using the 16 bits registers (as the LCS and alignment lengths are kept on 16 bits)
+	if (lmax > SHRT_MAX)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError: can not align sequences longer than %d (as the LCS and alignment lengths are kept on 16 bits)", SHRT_MAX);
+		return 0; 		// TODO DOUBLE_MIN to flag error
+	}
+
 	// If the score is expressed as a normalized distance, get the corresponding identity
 	if (!similarity_mode && normalize)
 		threshold = 1.0 - threshold;
@@ -995,6 +1003,14 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
 		lmin = l1;
 	}
 
+	// Check that the sequences are not greater than what can be aligned using the 16 bits registers (as the LCS and alignment lengths are kept on 16 bits)
+	if (lmax > SHRT_MAX)
+	{
+		obi_set_errno(OBI_ALIGN_ERROR);
+		obidebug(1, "\nError: can not align sequences longer than %d (as the LCS and alignment lengths are kept on 16 bits)", SHRT_MAX);
+		return 0; 		// TODO DOUBLE_MIN to flag error
+	}
+
 	// If the score is expressed as a normalized distance, get the corresponding identity
 	if (!similarity_mode && normalize)
 		threshold = 1.0 - threshold;

From 897032387f5159b9d8e6d19cc94a4ac8be77887c Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Thu, 5 Jan 2017 14:28:36 +0100
Subject: [PATCH 11/22] Taxonomy: reading merged.dmp file in taxdump

---
 src/obidms_taxonomy.c | 226 ++++++++++++++++++++++++++++++++++++++++--
 src/obidms_taxonomy.h |  35 +++++--
 2 files changed, 240 insertions(+), 21 deletions(-)

diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c
index 3eafd85..e096a8c 100644
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@@ -1041,6 +1041,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	FILE*             file;
 	bool 			  nodes_found=false;
 	bool			  names_found=false;
+	bool			  merged_found=false;
 	char 			  line[2048];			// TODO large enough?
 	char*			  elt;
 	char*			  file_name;
@@ -1049,9 +1050,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	int				  n;
 	char**			  rank_names;
 	int*			  parent_taxids;
-	int				  taxid;
+	int				  taxid, old_taxid;
 	bool			  already_in;
-	ecotx_t*		  t;
+	ecotx_t* 		  t;
 
 	// Initialize taxonomy structure
 	tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
@@ -1061,9 +1062,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 		obidebug(1, "\nError allocating the memory for a taxonomy structure");
 		return NULL;
 	}
-	tax->ranks = NULL;
-	tax->taxa  = NULL;
-	tax->names = NULL;
+	tax->ranks      = NULL;
+	tax->taxa       = NULL;
+	tax->names      = NULL;
+	tax->merged_idx = NULL;
 
 	tax->dms = NULL;
 	(tax->tax_name)[0] = '\0';
@@ -1312,6 +1314,204 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	closedir(tax_dir);
 
 
+	// Go through directory again for next file		// TODO make separate functions?
+	tax_dir = opendir(taxdump);
+	if (tax_dir == NULL)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nProblem opening a taxdump directory");
+		obi_close_taxonomy(tax);
+		free(parent_taxids);
+		free(rank_names);
+		return NULL;
+	}
+
+	// Go through taxonomy files
+	while ((dp = readdir(tax_dir)) != NULL)
+	{
+		if (strcmp(dp->d_name, "merged.dmp") == 0)
+		{
+			merged_found = true;	// TODO
+			buffer_size = 10000;
+
+			// Initializing the merged structure
+			tax->merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
+			if (tax->merged_idx == NULL)
+			{
+				obi_set_errno(OBI_MALLOC_ERROR);
+				obidebug(1, "\nError allocating the memory for a taxonomy structure");
+				obi_close_taxonomy(tax);
+				free(parent_taxids);
+				free(rank_names);
+				closedir(tax_dir);
+				return NULL;
+			}
+
+			// Allocating the memory for the file name
+			file_name =	(char*) malloc((strlen(taxdump) + 12)*sizeof(char));
+			if (file_name == NULL)
+			{
+				obi_set_errno(OBI_MALLOC_ERROR);
+				obidebug(1, "\nError allocating the memory for a file name");
+				obi_close_taxonomy(tax);
+				free(parent_taxids);
+				free(rank_names);
+				closedir(tax_dir);
+				return NULL;
+			}
+
+			// Build the file path
+			if (sprintf(file_name, "%s/merged.dmp", taxdump) < 0)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError building a taxonomy file name");
+				obi_close_taxonomy(tax);
+				closedir(tax_dir);
+				free(parent_taxids);
+				free(rank_names);
+				free(file_name);
+				return NULL;
+			}
+
+			file = fopen(file_name, "r");
+			if (file == NULL)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nProblem opening a taxonomy file");
+				obi_close_taxonomy(tax);
+				closedir(tax_dir);
+				free(parent_taxids);
+				free(rank_names);
+				free(file_name);
+				return NULL;
+			}
+
+			free(file_name);
+
+			n = 0;
+			i = 0;
+			while (fgets(line, sizeof(line), file))
+			{
+				// Check for terminal '\n' character (line complete)
+				if (line[strlen(line) - 1] != '\n')
+				{
+					obi_set_errno(OBI_TAXONOMY_ERROR);
+					obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file");
+					obi_close_taxonomy(tax);
+					fclose(file);
+					closedir(tax_dir);
+					free(parent_taxids);
+					free(rank_names);
+					return NULL;
+				}
+
+				// Parse the 2 elements separated by '|'
+
+				// Get first element
+				elt = strtok(line, "|");
+				// Remove the last character (tab character)
+				elt[strlen(elt)-1] = '\0';
+				// First element: old deprecated taxid
+				old_taxid = atoi(elt);
+
+				// Get 2nd element: new taxid
+				elt = strtok(NULL, "|");
+				// Remove the first and the last characters (tab characters)
+				elt = elt+1;
+				elt[strlen(elt)-1] = '\0';
+				taxid = atoi(elt);
+
+				// Store the old taxid in the merged_idx ordered taxid list
+					// First, store the taxids from the current taxonomy that come before
+				while ((i < (tax->taxa)->count) && ((tax->taxa)->taxon[i].taxid < old_taxid))
+				{
+					// Enlarge structures if needed
+					if (n == buffer_size)
+					{
+						buffer_size = buffer_size * 2;
+						tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
+						if (tax->merged_idx == NULL)
+						{
+							obi_set_errno(OBI_MALLOC_ERROR);
+							obidebug(1, "\nError reallocating memory for a taxonomy structure");
+							obi_close_taxonomy(tax);
+							fclose(file);
+							closedir(tax_dir);
+							free(parent_taxids);
+							free(rank_names);
+							return NULL;
+						}
+					}
+
+					(tax->merged_idx)->merged[n].taxid = (tax->taxa)->taxon[i].taxid;
+					(tax->merged_idx)->merged[n].idx = i;
+					i++;
+					n++;
+				}
+
+				// Enlarge structures if needed
+				if (n == buffer_size)
+				{
+					buffer_size = buffer_size * 2;
+					tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
+					if (tax->merged_idx == NULL)
+					{
+						obi_set_errno(OBI_MALLOC_ERROR);
+						obidebug(1, "\nError reallocating memory for a taxonomy structure");
+						obi_close_taxonomy(tax);
+						fclose(file);
+						closedir(tax_dir);
+						free(parent_taxids);
+						free(rank_names);
+						return NULL;
+					}
+				}
+
+					// Store the deprecated taxid with the index that refers to the new taxid
+						// Find the index of the new taxid
+				t = obi_taxo_get_taxon_with_taxid(tax, taxid);
+						// Store the old taxid with the index
+				(tax->merged_idx)->merged[n].taxid = old_taxid;
+				(tax->merged_idx)->merged[n].idx = t->idx;
+				n++;
+			}
+
+			// Check that fgets stopped because it reached EOF
+			if (!feof(file))
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError: file reading was stopped before end of file");
+				obi_close_taxonomy(tax);
+				fclose(file);
+				closedir(tax_dir);
+				free(parent_taxids);
+				free(rank_names);
+				return NULL;
+			}
+
+			// Store count
+			(tax->merged_idx)->count = n;
+
+			// Truncate the structure memory to the right size
+			tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * (tax->merged_idx)->count);
+			if (tax->merged_idx == NULL)
+			{
+				obi_set_errno(OBI_MALLOC_ERROR);
+				obidebug(1, "\nError reallocating memory for a a taxonomy structure");
+				obi_close_taxonomy(tax);
+				fclose(file);
+				closedir(tax_dir);
+				free(parent_taxids);
+				free(rank_names);
+				return NULL;
+			}
+
+			fclose(file);
+		}
+	}
+	closedir(tax_dir);
+
+
 	// Go through directory again for next file
 	tax_dir = opendir(taxdump);
 	if (tax_dir == NULL)
@@ -1346,7 +1546,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			}
 
 			// Allocating the memory for the file name
-			file_name =	(char*) malloc((strlen(taxdump) + 10)*sizeof(char));
+			file_name =	(char*) malloc((strlen(taxdump) + 11)*sizeof(char));
 			if (file_name == NULL)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
@@ -1684,7 +1884,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 
 	(tax->taxa)->buffer_size = (tax->taxa)->count;
 
-	// Compute longest branches    TODO what is this for???
+	// Compute longest branches
 	for (i=0; i < (tax->taxa)->count; i++)
 	{
 		t = (((tax->taxa))->taxon)+i;
@@ -1844,9 +2044,10 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
  		return NULL;
 	}
 
-	tax->ranks = NULL;
-	tax->taxa  = NULL;
-	tax->names = NULL;
+	tax->ranks      = NULL;
+	tax->taxa       = NULL;
+	tax->names      = NULL;
+	tax->merged_idx = NULL;
 
 	tax->dms = dms;
 
@@ -2028,6 +2229,11 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
 			free(taxonomy->taxa);
 		}
 
+		if (taxonomy->merged_idx)
+		{
+			free(taxonomy->merged_idx);
+		}
+
 		free(taxonomy);
 	}
 
diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h
index e70e892..543a257 100644
--- a/src/obidms_taxonomy.h
+++ b/src/obidms_taxonomy.h
@@ -26,12 +26,12 @@ typedef struct {
 	int32_t  rank;
 	int32_t	 parent;
 	int32_t  name_length;
-	char     name[1];
+	char     name[];
 } ecotxformat_t;
 
 
 typedef struct ecotxnode {
-	int32_t           taxid;
+	int32_t           taxid;	// TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
 	int32_t           rank;
 	int32_t  		  farest;
 	int32_t			  idx;
@@ -47,13 +47,13 @@ typedef struct {
 	int32_t local_count;
 	int32_t max_taxid;
 	int32_t buffer_size;
-	ecotx_t taxon[1];
+	ecotx_t taxon[];
 } ecotxidx_t;
 
 
 typedef struct {
 	int32_t count;
-	char*   label[1];
+	char*   label[];
 } ecorankidx_t;
 
 
@@ -62,7 +62,7 @@ typedef struct {
 	int32_t  name_length;
 	int32_t  class_length;
 	int32_t  taxid;	// taxid idx
-	char     names[1];
+	char     names[];
 } econameformat_t;
 
 
@@ -76,16 +76,29 @@ typedef struct {
 
 typedef struct {
 	int32_t   count;
-	econame_t names[1];
+	econame_t names[];
 } econameidx_t;
 
 
+typedef struct {
+	int32_t taxid;
+	int32_t idx;
+} ecomerged_t;
+
+
+typedef struct {
+	int32_t     count;
+	ecomerged_t merged[];
+} ecomergedidx_t;
+
+
 typedef struct OBIDMS_taxonomy_t {
-	char          tax_name[TAX_NAME_LEN];
-	OBIDMS_p      dms;
-	ecorankidx_t* ranks;
-	econameidx_t* names;
-	ecotxidx_t*   taxa;
+	char            tax_name[TAX_NAME_LEN];
+	OBIDMS_p        dms;
+	ecomergedidx_t* merged_idx;
+	ecorankidx_t*   ranks;
+	econameidx_t*   names;
+	ecotxidx_t*     taxa;
 } OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
 
 

From f396625f981dfe9c38e4b97dd90384dc80fd1f1d Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Thu, 5 Jan 2017 15:37:13 +0100
Subject: [PATCH 12/22] Taxonomy: function to write *.adx files

---
 src/obidms_taxonomy.c | 99 ++++++++++++++++++++++++++++++++++++++++++-
 src/obidms_taxonomy.h |  5 ---
 2 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c
index e096a8c..bf9d773 100644
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@@ -996,6 +996,101 @@ int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
 }
 
 
+int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
+{
+	int     i;
+	char* 	file_name;
+	int 	file_descriptor;
+	off_t 	file_size;
+	char*   taxonomy_path;
+
+	// Compute file size
+	file_size = sizeof(int32_t) + (sizeof(int32_t) * 3 * (tax->merged_idx)->count);
+
+	// Build the taxonomy directory path
+	taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
+
+	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
+	if (file_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating the memory for a binary taxonomy file name");
+		return -1;
+	}
+
+	// Build the file path
+	if (sprintf(file_name, "%s/%s.adx", taxonomy_path, taxonomy_name) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError building a binary taxonomy file name");
+		return -1;
+	}
+
+	free(taxonomy_path);
+
+	// Create file
+	file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
+	if (file_descriptor < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError creating a binary taxonomy file %s", file_name);
+		free(file_name);
+		return -1;
+	}
+
+	free(file_name);
+
+	// Truncate the file to the right size
+	if (ftruncate(file_descriptor, file_size) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError truncating a binary taxonomy file");
+		close(file_descriptor);
+		return -1;
+	}
+
+	// Write merged indices count
+	if (write(file_descriptor, &((tax->merged_idx)->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError writing in a binary taxonomy file");
+		close(file_descriptor);
+		return -1;
+	}
+
+	// Write merged indices
+	for (i=0; i < (tax->merged_idx)->count; i++)
+	{
+		// Write taxid
+		if (write(file_descriptor, &(((tax->merged_idx)->merged)[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+
+		// Write index corresponding to the taxid in the ecotxidx_t structure
+		if (write(file_descriptor, &(((tax->merged_idx)->merged)[i].idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+	}
+
+	// Close file
+	if (close(file_descriptor) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError closing a taxonomy file file");
+		return -1;
+	}
+
+	return 0;
+}
+
 
 int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name)
 {
@@ -1024,6 +1119,8 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
          return -1;
      if (write_nameidx(dms, tax, tax_name) < 0)
          return -1;
+     if (write_mergedidx(dms, tax, tax_name) < 0)
+         return -1;
      // Check if there are local taxa (if so last taxon is local)
      if ((tax->taxa)->local_count > 0)
     	 if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
@@ -1331,7 +1428,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	{
 		if (strcmp(dp->d_name, "merged.dmp") == 0)
 		{
-			merged_found = true;	// TODO
+			merged_found = true;
 			buffer_size = 10000;
 
 			// Initializing the merged structure
diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h
index 543a257..48d0293 100644
--- a/src/obidms_taxonomy.h
+++ b/src/obidms_taxonomy.h
@@ -122,11 +122,6 @@ ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
 
 ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
 
-
-int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
-int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
-int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
-
 int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
 
 OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);

From d68374018bf43eedee8cb3e1500a350b6f308694 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Fri, 6 Jan 2017 15:52:21 +0100
Subject: [PATCH 13/22] Taxonomy: functions to read the *.adx file (containing
 the deprecated and current taxids and their corresponding indices in the taxa
 structure) and to find the taxa using the merged index.

---
 src/obidms_taxonomy.c | 140 +++++++++++++++++++++++++++++++++++++++---
 src/obidms_taxonomy.h |   1 +
 2 files changed, 132 insertions(+), 9 deletions(-)

diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c
index bf9d773..2779699 100644
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@@ -35,7 +35,7 @@ int cmp_rank_labels(const void* label1, const void* label2)
 }
 
 
-static int cmp_taxids(const void* ptaxid, const void* ptaxon)
+static int cmp_taxids_in_ecotx_t(const void* ptaxid, const void* ptaxon)
 {
   ecotx_t* current_taxon = (ecotx_t*) ptaxon;
   int32_t  taxid = (int32_t) ((size_t) ptaxid);
@@ -43,6 +43,14 @@ static int cmp_taxids(const void* ptaxid, const void* ptaxon)
 }
 
 
+static int cmp_taxids_in_ecomerged_t(const void* ptaxid, const void* ptaxon)
+{
+  ecomerged_t* current_taxon = (ecomerged_t*) ptaxon;
+  int32_t  taxid = (int32_t) ((size_t) ptaxid);
+  return taxid - current_taxon->taxid;
+}
+
+
 static int cmp_str(const void* s1, const void* s2)
 {
     return strcmp(*((char**)s1), *((char**)s2));
@@ -467,6 +475,55 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 
 
 
+ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
+{
+	int32_t      		count;
+	FILE*				f;
+	ecomergedidx_t*		index_merged_idx;
+	ecomerged_t*		merged_idx;
+	int32_t      		i;
+	int32_t             record_length;
+
+	f = open_ecorecorddb(file_name, &count, 0);
+	if (f == NULL)
+	{
+ 		obidebug(1, "\nError reading taxonomy name file");
+ 		return NULL;
+	}
+
+	index_merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + (sizeof(ecomerged_t) * count));
+	if (index_merged_idx == NULL)
+	{
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError reading taxonomy name file");
+ 		return NULL;
+	}
+
+	index_merged_idx->count = count;
+
+	for (i=0; i < count; i++)
+	{
+		merged_idx = read_ecorecord(f, &record_length);
+		memcpy((index_merged_idx->merged)+i, merged_idx, record_length);
+		if ((index_merged_idx->merged)+i == NULL)
+		{
+	 		obi_set_errno(OBI_TAXONOMY_ERROR);
+	 		obidebug(1, "\nError reading taxonomy name file");
+	 		free(index_merged_idx);
+	 		return NULL;
+		}
+	}
+
+	fclose(f);
+
+	return index_merged_idx;
+}
+
+
+
+
+
+
 
 
 // Functions to write taxonomy structure to binary files
@@ -1003,6 +1060,7 @@ int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_na
 	int 	file_descriptor;
 	off_t 	file_size;
 	char*   taxonomy_path;
+	int32_t record_size;
 
 	// Compute file size
 	file_size = sizeof(int32_t) + (sizeof(int32_t) * 3 * (tax->merged_idx)->count);
@@ -1058,9 +1116,20 @@ int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_na
 		return -1;
 	}
 
+	record_size = 2 * sizeof(int32_t);
+
 	// Write merged indices
 	for (i=0; i < (tax->merged_idx)->count; i++)
 	{
+		// Write record size
+		if (write(file_descriptor, &(record_size), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+
 		// Write taxid
 		if (write(file_descriptor, &(((tax->merged_idx)->merged)[i].taxid), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
 		{
@@ -1566,7 +1635,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 
 					// Store the deprecated taxid with the index that refers to the new taxid
 						// Find the index of the new taxid
-				t = obi_taxo_get_taxon_with_taxid(tax, taxid);
+				t = obi_taxo_get_taxon_with_current_taxid(tax, taxid);
 						// Store the old taxid with the index
 				(tax->merged_idx)->merged[n].taxid = old_taxid;
 				(tax->merged_idx)->merged[n].idx = t->idx;
@@ -1966,7 +2035,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	// Associate the taxa with their parent
 	for (i=0; i < (tax->taxa)->count; i++)
 	{
-		((tax->taxa)->taxon)[i].parent = obi_taxo_get_taxon_with_taxid(tax, parent_taxids[i]);
+		((tax->taxa)->taxon)[i].parent = obi_taxo_get_taxon_with_current_taxid(tax, parent_taxids[i]);
 		if (((tax->taxa)->taxon)[i].parent == NULL)
 		{
 			obi_set_errno(OBI_TAXONOMY_ERROR);
@@ -2129,6 +2198,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 	char*			   taxonomy_path;
 	char*  			   ranks_file_name;
 	char*              taxa_file_name;
+	char*              merged_idx_file_name;
 	char*			   local_taxa_file_name;
 	char*			   alter_names_file_name;
 	int                buffer_size;
@@ -2238,6 +2308,35 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 	free(taxa_file_name);
 	free(local_taxa_file_name);
 
+	// Read merged index (old and current taxids referring to indices in the taxa structure)
+	merged_idx_file_name = (char*) malloc(buffer_size*sizeof(char));
+	if (merged_idx_file_name == NULL)
+	{
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError allocating memory for merged index file name");
+		free(taxonomy_path);
+		obi_close_taxonomy(tax);
+		return NULL;
+	}
+	if (snprintf(merged_idx_file_name, buffer_size, "%s/%s.adx", taxonomy_path, taxonomy_name) < 0)
+	{
+ 		obi_set_errno(OBI_TAXONOMY_ERROR);
+ 		obidebug(1, "\nError building merged index file name");
+		free(taxonomy_path);
+		free(merged_idx_file_name);
+		obi_close_taxonomy(tax);
+		return NULL;
+	}
+    tax->merged_idx = read_mergedidx(merged_idx_file_name, tax);
+	if (tax->merged_idx == NULL)
+	{
+		free(taxonomy_path);
+		free(merged_idx_file_name);
+		obi_close_taxonomy(tax);
+		return NULL;
+	}
+    free(merged_idx_file_name);
+
 	// Read alternative names
 	if (read_alternative_names)
 	{
@@ -2363,10 +2462,10 @@ ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
 }
 
 
-ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
+ecotx_t* obi_taxo_get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)	// TODO discuss keeping private?
 {
-	ecotx_t    *current_taxon;
-	int32_t     count;
+	ecotx_t *current_taxon;
+	int32_t  count;
 
 	count = (taxonomy->taxa)->count;
 
@@ -2374,12 +2473,35 @@ ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid
                                        (const void *) taxonomy->taxa->taxon,
                                        count,
                                        sizeof(ecotx_t),
-                                       cmp_taxids);
+									   cmp_taxids_in_ecotx_t);
 	return current_taxon;
 }
 
 
-bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)
+ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
+{
+	ecotx_t     *current_taxon;
+	ecomerged_t *indexed_taxon;
+	int32_t      count;
+
+	count = (taxonomy->merged_idx)->count;
+
+	indexed_taxon = (ecomerged_t*) bsearch((const void *) ((size_t) taxid),
+                                       	   (const void *) taxonomy->merged_idx->merged,
+										   count,
+										   sizeof(ecomerged_t),
+										   cmp_taxids_in_ecomerged_t);
+
+	if (indexed_taxon == NULL)
+		current_taxon = NULL;
+	else
+		current_taxon = (taxonomy->taxa->taxon)+(indexed_taxon->idx);
+
+	return current_taxon;
+}
+
+
+bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)		// TODO discuss that this doesn't work with deprecated taxids
 {
 	ecotx_t* next_parent;
 
@@ -2486,7 +2608,7 @@ ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
 ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
 {
 	static OBIDMS_taxonomy_p tax = NULL;
-	static int32_t		  rankindex = -1;
+	static int32_t		     rankindex = -1;
 
 	if (taxonomy && (tax != taxonomy))
 	{
diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h
index 48d0293..33d8aba 100644
--- a/src/obidms_taxonomy.h
+++ b/src/obidms_taxonomy.h
@@ -108,6 +108,7 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
 
 ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
 
+ecotx_t* obi_taxo_get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
 ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
 
 bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);

From 41ad3deec039f899feff8b785f6212999cbf2597 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Mon, 9 Jan 2017 17:28:49 +0100
Subject: [PATCH 14/22] Taxonomy: informations about deleted taxids is now read
 from delnodes.dmp file and added to *.adx file

---
 src/obidms_taxonomy.c | 258 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 228 insertions(+), 30 deletions(-)

diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c
index 2779699..c70f788 100644
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@@ -1208,12 +1208,15 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	bool 			  nodes_found=false;
 	bool			  names_found=false;
 	bool			  merged_found=false;
+	bool 			  delnodes_found=false;
+	int32_t*          delnodes=NULL;
+	int32_t			  delnodes_count;
 	char 			  line[2048];			// TODO large enough?
 	char*			  elt;
 	char*			  file_name;
 	int				  buffer_size;
 	int			      i, j;
-	int				  n;
+	int				  n, nD, nT;
 	char**			  rank_names;
 	int*			  parent_taxids;
 	int				  taxid, old_taxid;
@@ -1495,14 +1498,14 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	// Go through taxonomy files
 	while ((dp = readdir(tax_dir)) != NULL)
 	{
-		if (strcmp(dp->d_name, "merged.dmp") == 0)
+		if (strcmp(dp->d_name, "delnodes.dmp") == 0)
 		{
-			merged_found = true;
+			delnodes_found = true;
 			buffer_size = 10000;
 
-			// Initializing the merged structure
-			tax->merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
-			if (tax->merged_idx == NULL)
+			// Initializing the list of deleted nodes
+			delnodes = (int32_t*) malloc(sizeof(int32_t) * buffer_size);
+			if (delnodes == NULL)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError allocating the memory for a taxonomy structure");
@@ -1522,6 +1525,156 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				obi_close_taxonomy(tax);
 				free(parent_taxids);
 				free(rank_names);
+				free(delnodes);
+				closedir(tax_dir);
+				return NULL;
+			}
+
+			// Build the file path
+			if (sprintf(file_name, "%s/delnodes.dmp", taxdump) < 0)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError building a taxonomy file name");
+				obi_close_taxonomy(tax);
+				closedir(tax_dir);
+				free(parent_taxids);
+				free(rank_names);
+				free(file_name);
+				free(delnodes);
+				return NULL;
+			}
+
+			file = fopen(file_name, "r");
+			if (file == NULL)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nProblem opening a taxonomy file");
+				obi_close_taxonomy(tax);
+				closedir(tax_dir);
+				free(parent_taxids);
+				free(rank_names);
+				free(file_name);
+				free(delnodes);
+				return NULL;
+			}
+
+			free(file_name);
+
+			n = 0;
+			while (fgets(line, sizeof(line), file))
+			{
+				// Check for terminal '\n' character (line complete)
+				if (line[strlen(line) - 1] != '\n')
+				{
+					obi_set_errno(OBI_TAXONOMY_ERROR);
+					obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file");
+					obi_close_taxonomy(tax);
+					fclose(file);
+					closedir(tax_dir);
+					free(parent_taxids);
+					free(rank_names);
+					free(delnodes);
+					return NULL;
+				}
+
+				// Get first and only element of the line (the deprecated taxid)
+				elt = strtok(line, "|");
+				// Remove the last character (tab character)
+				elt[strlen(elt)-1] = '\0';
+				// First element: old deprecated taxid
+				old_taxid = atoi(elt);
+
+				// Store the old taxid in the list of deleted taxids
+					// Enlarge array if needed
+				if (n == buffer_size)
+				{
+					buffer_size = buffer_size * 2;
+					delnodes = (int32_t*) realloc(tax->merged_idx, sizeof(int32_t) * buffer_size);
+					if (delnodes == NULL)
+					{
+						obi_set_errno(OBI_MALLOC_ERROR);
+						obidebug(1, "\nError reallocating memory for a taxonomy structure");
+						obi_close_taxonomy(tax);
+						fclose(file);
+						closedir(tax_dir);
+						free(parent_taxids);
+						free(rank_names);
+						return NULL;
+					}
+				}
+
+				delnodes[n] = old_taxid;
+				n++;
+			}
+
+			// Check that fgets stopped because it reached EOF
+			if (!feof(file))
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError: file reading was stopped before end of file");
+				obi_close_taxonomy(tax);
+				fclose(file);
+				closedir(tax_dir);
+				free(parent_taxids);
+				free(rank_names);
+				free(delnodes);
+				return NULL;
+			}
+
+			// Store count
+			delnodes_count = n;
+
+			fclose(file);
+		}
+	}
+	closedir(tax_dir);
+
+
+	// Go through directory again for next file		// TODO make separate functions?
+	tax_dir = opendir(taxdump);
+	if (tax_dir == NULL)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nProblem opening a taxdump directory");
+		obi_close_taxonomy(tax);
+		free(parent_taxids);
+		free(rank_names);
+		free(delnodes);
+		return NULL;
+	}
+
+	// Go through taxonomy files
+	while ((dp = readdir(tax_dir)) != NULL)
+	{
+		if (strcmp(dp->d_name, "merged.dmp") == 0)
+		{
+			merged_found = true;
+			buffer_size = 10000;
+
+			// Initializing the merged structure
+			tax->merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
+			if (tax->merged_idx == NULL)
+			{
+				obi_set_errno(OBI_MALLOC_ERROR);
+				obidebug(1, "\nError allocating the memory for a taxonomy structure");
+				obi_close_taxonomy(tax);
+				free(parent_taxids);
+				free(rank_names);
+				free(delnodes);
+				closedir(tax_dir);
+				return NULL;
+			}
+
+			// Allocating the memory for the file name
+			file_name =	(char*) malloc((strlen(taxdump) + 12)*sizeof(char));
+			if (file_name == NULL)
+			{
+				obi_set_errno(OBI_MALLOC_ERROR);
+				obidebug(1, "\nError allocating the memory for a file name");
+				obi_close_taxonomy(tax);
+				free(parent_taxids);
+				free(rank_names);
+				free(delnodes);
 				closedir(tax_dir);
 				return NULL;
 			}
@@ -1536,6 +1689,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				free(parent_taxids);
 				free(rank_names);
 				free(file_name);
+				free(delnodes);
 				return NULL;
 			}
 
@@ -1549,13 +1703,15 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				free(parent_taxids);
 				free(rank_names);
 				free(file_name);
+				free(delnodes);
 				return NULL;
 			}
 
 			free(file_name);
 
-			n = 0;
-			i = 0;
+			nT = 0;					// to point in current taxa list while merging
+			nD = delnodes_count-1;	// to point in deleted taxids list while merging (going from count-1 to 0 because taxids are sorted in descending order)
+			n = 0;					// to point in final merged list while merging
 			while (fgets(line, sizeof(line), file))
 			{
 				// Check for terminal '\n' character (line complete)
@@ -1568,6 +1724,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 					closedir(tax_dir);
 					free(parent_taxids);
 					free(rank_names);
+					free(delnodes);
 					return NULL;
 				}
 
@@ -1588,34 +1745,68 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				taxid = atoi(elt);
 
 				// Store the old taxid in the merged_idx ordered taxid list
-					// First, store the taxids from the current taxonomy that come before
-				while ((i < (tax->taxa)->count) && ((tax->taxa)->taxon[i].taxid < old_taxid))
+				// The merged list is an ordered list of the current taxids, the deprecated taxids that have current references,
+				// and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index
+				// of the taxon in the taxa structure, or -1 for deleted taxids.
+				// Creating the merged list requires to merge the 3 ordered lists into one.
+				while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) && ((nD >= 0) && (delnodes[nD] < old_taxid)))
 				{
-					// Enlarge structures if needed
-					if (n == buffer_size)
-					{
-						buffer_size = buffer_size * 2;
-						tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
-						if (tax->merged_idx == NULL)
+					if ((tax->taxa)->taxon[nT].taxid < delnodes[nD])
+					{ // Add element from taxa list
+						// Enlarge structure if needed
+						if (n == buffer_size)
 						{
-							obi_set_errno(OBI_MALLOC_ERROR);
-							obidebug(1, "\nError reallocating memory for a taxonomy structure");
-							obi_close_taxonomy(tax);
-							fclose(file);
-							closedir(tax_dir);
-							free(parent_taxids);
-							free(rank_names);
-							return NULL;
+							buffer_size = buffer_size * 2;
+							tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
+							if (tax->merged_idx == NULL)
+							{
+								obi_set_errno(OBI_MALLOC_ERROR);
+								obidebug(1, "\nError reallocating memory for a taxonomy structure");
+								obi_close_taxonomy(tax);
+								fclose(file);
+								closedir(tax_dir);
+								free(parent_taxids);
+								free(rank_names);
+								free(delnodes);
+								return NULL;
+							}
 						}
-					}
 
-					(tax->merged_idx)->merged[n].taxid = (tax->taxa)->taxon[i].taxid;
-					(tax->merged_idx)->merged[n].idx = i;
-					i++;
-					n++;
+						(tax->merged_idx)->merged[n].taxid = (tax->taxa)->taxon[nT].taxid;
+						(tax->merged_idx)->merged[n].idx = nT;
+						nT++;
+						n++;
+					}
+					else if (delnodes[nD] < (tax->taxa)->taxon[nT].taxid)
+					{ // Add element from deleted taxids list
+						// Enlarge structure if needed
+						if (n == buffer_size)
+						{
+							buffer_size = buffer_size * 2;
+							tax->merged_idx = (ecomergedidx_t*) realloc(tax->merged_idx, sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
+							if (tax->merged_idx == NULL)
+							{
+								obi_set_errno(OBI_MALLOC_ERROR);
+								obidebug(1, "\nError reallocating memory for a taxonomy structure");
+								obi_close_taxonomy(tax);
+								fclose(file);
+								closedir(tax_dir);
+								free(parent_taxids);
+								free(rank_names);
+								free(delnodes);
+								return NULL;
+							}
+						}
+
+						(tax->merged_idx)->merged[n].taxid = delnodes[nD];
+						(tax->merged_idx)->merged[n].idx = -1;	// The index to tag deleted taxids is -1
+						nD--;
+						n++;
+					}
 				}
 
-				// Enlarge structures if needed
+				// Add the deprecated taxid
+				// Enlarge structure if needed
 				if (n == buffer_size)
 				{
 					buffer_size = buffer_size * 2;
@@ -1629,6 +1820,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 						closedir(tax_dir);
 						free(parent_taxids);
 						free(rank_names);
+						free(delnodes);
 						return NULL;
 					}
 				}
@@ -1675,6 +1867,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			fclose(file);
 		}
 	}
+
+	// Free delnodes array, not needed anymore
+	free(delnodes);
+
 	closedir(tax_dir);
 
 
@@ -2494,6 +2690,8 @@ ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid
 
 	if (indexed_taxon == NULL)
 		current_taxon = NULL;
+	else if (indexed_taxon->idx == -1)
+		current_taxon = NULL;	// TODO discuss what to do when old deleted taxon
 	else
 		current_taxon = (taxonomy->taxa->taxon)+(indexed_taxon->idx);
 

From 0385a92e02c8c66d2d45fd880ac2c7a8f25071f2 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Wed, 11 Jan 2017 16:36:08 +0100
Subject: [PATCH 15/22] Taxonomy: Refactored the taxdump reading, and little
 fixes

---
 src/obidms_taxonomy.c | 769 ++++++++++++++++++++++--------------------
 1 file changed, 410 insertions(+), 359 deletions(-)

diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c
index c70f788..6295be8 100644
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@@ -259,7 +259,7 @@ ecorankidx_t* read_rankidx(const char* ranks_file_name)
 	if (ranks_file==NULL)
 		return NULL;
 
-	ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * (count-1));
+	ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * count);
 	if (ranks_index == NULL)
 	{
  		obi_set_errno(OBI_MALLOC_ERROR);
@@ -321,7 +321,7 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
 
 	f_local_taxa = open_ecorecorddb(local_taxa_file_name, &count_local_taxa, 0);
 
-	taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa - 1));
+	taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa));
 	if (taxa_index == NULL)
 	{
  		obi_set_errno(OBI_MALLOC_ERROR);
@@ -445,7 +445,7 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
  		return NULL;
 	}
 
-	index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * (count-1));
+	index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
 	if (index_names == NULL)
 	{
  		obi_set_errno(OBI_MALLOC_ERROR);
@@ -548,7 +548,7 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
 	// Build the taxonomy directory path
 	taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
 
-	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char));
+	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
 	if (file_name == NULL)
 	{
 		obi_set_errno(OBI_MALLOC_ERROR);
@@ -622,8 +622,8 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
 	// Close file
 	if (close(file_descriptor) < 0)
 	{
-		obi_set_errno(OBIDMS_UNKNOWN_ERROR);
-		obidebug(1, "\nError closing a DMS information file");
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError closing an rdx taxonomy file");
 		return -1;
 	}
 
@@ -652,7 +652,7 @@ int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_
 	// Build the taxonomy directory path
 	taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
 
-	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char));
+	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
 	if (file_name == NULL)
 	{
 		obi_set_errno(OBI_MALLOC_ERROR);
@@ -759,8 +759,8 @@ int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_
 	// Close file
 	if (close(file_descriptor) < 0)
 	{
-		obi_set_errno(OBIDMS_UNKNOWN_ERROR);
-		obidebug(1, "\nError closing a DMS information file");
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError closing a tdx taxonomy file");
 		return -1;
 	}
 
@@ -789,7 +789,7 @@ int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* ta
 	// Build the taxonomy directory path
 	taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
 
-	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char));
+	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
 	if (file_name == NULL)
 	{
 		obi_set_errno(OBI_MALLOC_ERROR);
@@ -896,8 +896,8 @@ int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* ta
 	// Close file
 	if (close(file_descriptor) < 0)
 	{
-		obi_set_errno(OBIDMS_UNKNOWN_ERROR);
-		obidebug(1, "\nError closing a DMS information file");
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError closing a ldx taxonomy file");
 		return -1;
 	}
 
@@ -928,7 +928,7 @@ int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
 	// Build the taxonomy directory path
 	taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
 
-	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 5)*sizeof(char));
+	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
 	if (file_name == NULL)
 	{
 		obi_set_errno(OBI_MALLOC_ERROR);
@@ -1045,7 +1045,7 @@ int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
 	if (close(file_descriptor) < 0)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
-		obidebug(1, "\nError closing a DMS information file");
+		obidebug(1, "\nError closing a ndx taxonomy file");
 		return -1;
 	}
 
@@ -1153,7 +1153,7 @@ int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_na
 	if (close(file_descriptor) < 0)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
-		obidebug(1, "\nError closing a taxonomy file file");
+		obidebug(1, "\nError closing an adx taxonomy file");
 		return -1;
 	}
 
@@ -1199,64 +1199,56 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
 }
 
 
-OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
-{
-	OBIDMS_taxonomy_p tax;
-	struct dirent*    dp;
-	DIR*              tax_dir;
-	FILE*             file;
-	bool 			  nodes_found=false;
-	bool			  names_found=false;
-	bool			  merged_found=false;
-	bool 			  delnodes_found=false;
-	int32_t*          delnodes=NULL;
-	int32_t			  delnodes_count;
-	char 			  line[2048];			// TODO large enough?
-	char*			  elt;
-	char*			  file_name;
-	int				  buffer_size;
-	int			      i, j;
-	int				  n, nD, nT;
-	char**			  rank_names;
-	int*			  parent_taxids;
-	int				  taxid, old_taxid;
-	bool			  already_in;
-	ecotx_t* 		  t;
 
-	// Initialize taxonomy structure
-	tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
-	if (tax == NULL)
+int read_nodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, 	char***	rank_names_p, int** parent_taxids_p)
+{
+	struct dirent* 	dp;
+	DIR*           	tax_dir;
+	FILE*          	file;
+	char*			file_name;
+	bool           	file_found=false;
+	char 			line[2048];			// TODO large enough?
+	char*			elt;
+	int				buffer_size;
+	int			    i, n;
+
+	buffer_size = 10000;
+
+	// Initialize rank names and parent taxids arrays
+	*parent_taxids_p = malloc(buffer_size * sizeof(int));
+	if (*parent_taxids_p == NULL)
 	{
 		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError allocating the memory for a taxonomy structure");
-		return NULL;
+		obidebug(1, "\nError allocating the memory for the parent taxids array");
+		return -1;
 	}
-	tax->ranks      = NULL;
-	tax->taxa       = NULL;
-	tax->names      = NULL;
-	tax->merged_idx = NULL;
 
-	tax->dms = NULL;
-	(tax->tax_name)[0] = '\0';
-
-	// TODO check if taxdump path is for a gz file to unzip or a directory
+	*rank_names_p = malloc(buffer_size * sizeof(char*));
+	if (*rank_names_p == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating the memory for the rank names array");
+		free(*parent_taxids_p);
+		return -1;
+	}
 
+	// Open the taxdum directory
 	tax_dir = opendir(taxdump);
 	if (tax_dir == NULL)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
 		obidebug(1, "\nProblem opening a taxdump directory");
-		free(tax);
-		return NULL;
+		free(*parent_taxids_p);
+		free(*rank_names_p);
+		return -1;
 	}
 
-	// Go through taxonomy files
+	// Look for the 'nodes.dmp' file
 	while ((dp = readdir(tax_dir)) != NULL)
 	{
 		if (strcmp(dp->d_name, "nodes.dmp") == 0)
 		{
-			nodes_found = true;
-			buffer_size = 10000;
+			file_found = true;
 
 			// Initializing the taxa structure
 			tax->taxa = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * buffer_size);
@@ -1264,57 +1256,34 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError allocating the memory for a taxonomy structure");
-				free(tax);
+				free(*parent_taxids_p);
+				free(*rank_names_p);
 				closedir(tax_dir);
-				return NULL;
-			}
-
-			// Initialize rank names and parent taxids arrays
-			parent_taxids = malloc(buffer_size * sizeof(int));
-			if (file_name == NULL)
-			{
-				obi_set_errno(OBI_MALLOC_ERROR);
-				obidebug(1, "\nError allocating the memory for a file name");
-				obi_close_taxonomy(tax);
-				closedir(tax_dir);
-				return NULL;
-			}
-
-			rank_names = malloc(buffer_size * sizeof(char*));
-			if (file_name == NULL)
-			{
-				obi_set_errno(OBI_MALLOC_ERROR);
-				obidebug(1, "\nError allocating the memory for a file name");
-				obi_close_taxonomy(tax);
-				free(parent_taxids);
-				closedir(tax_dir);
-				return NULL;
+				return -1;
 			}
 
 			// Allocating the memory for the file name
-			file_name =	(char*) malloc((strlen(taxdump) + 10)*sizeof(char));
+			file_name =	(char*) malloc((strlen(taxdump) + 11)*sizeof(char));
 			if (file_name == NULL)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError allocating the memory for a file name");
-				obi_close_taxonomy(tax);
-				free(parent_taxids);
-				free(rank_names);
+				free(*parent_taxids_p);
+				free(*rank_names_p);
 				closedir(tax_dir);
-				return NULL;
+				return -1;
 			}
 
 			// Build the file path
 			if (sprintf(file_name, "%s/nodes.dmp", taxdump) < 0)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
-				obidebug(1, "\nError building a taxonomy file name");
-				obi_close_taxonomy(tax);
+				obidebug(1, "\nError building a taxonomy file name for 'nodes.dmp'");
+				free(*parent_taxids_p);
+				free(*rank_names_p);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
 				free(file_name);
-				return NULL;
+				return -1;
 			}
 
 			file = fopen(file_name, "r");
@@ -1322,12 +1291,11 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nProblem opening a taxonomy file");
-				obi_close_taxonomy(tax);
+				free(*parent_taxids_p);
+				free(*rank_names_p);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
 				free(file_name);
-				return NULL;
+				return -1;
 			}
 
 			free(file_name);
@@ -1346,38 +1314,35 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 					{
 						obi_set_errno(OBI_MALLOC_ERROR);
 						obidebug(1, "\nError reallocating memory for a taxonomy structure");
-						obi_close_taxonomy(tax);
+						free(*parent_taxids_p);
+						free(*rank_names_p);
 						fclose(file);
 						closedir(tax_dir);
-						free(parent_taxids);
-						free(rank_names);
-						return NULL;
+						return -1;
 					}
 
-					parent_taxids = (int*) realloc(parent_taxids, sizeof(int) * buffer_size);
-					if (parent_taxids == NULL)
+					*parent_taxids_p = (int*) realloc(*parent_taxids_p, sizeof(int) * buffer_size);
+					if (*parent_taxids_p == NULL)
 					{
 						obi_set_errno(OBI_MALLOC_ERROR);
-						obidebug(1, "\nError reallocating memory for a taxonomy structure");
-						obi_close_taxonomy(tax);
+						obidebug(1, "\nError reallocating memory for the parent taxids array");
+						free(*parent_taxids_p);
+						free(*rank_names_p);
 						fclose(file);
 						closedir(tax_dir);
-						free(parent_taxids);
-						free(rank_names);
-						return NULL;
+						return -1;
 					}
 
-					rank_names = (char**) realloc(rank_names, sizeof(char*) * buffer_size);
-					if (rank_names == NULL)
+					*rank_names_p = (char**) realloc(*rank_names_p, sizeof(char*) * buffer_size);
+					if (*rank_names_p == NULL)
 					{
 						obi_set_errno(OBI_MALLOC_ERROR);
-						obidebug(1, "\nError reallocating memory for a taxonomy structure");
-						obi_close_taxonomy(tax);
+						obidebug(1, "\nError reallocating memory for the rank names array");
+						free(*parent_taxids_p);
+						free(*rank_names_p);
 						fclose(file);
 						closedir(tax_dir);
-						free(parent_taxids);
-						free(rank_names);
-						return NULL;
+						return -1;
 					}
 				}
 
@@ -1386,12 +1351,11 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				{
 					obi_set_errno(OBI_TAXONOMY_ERROR);
 					obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file");
-					obi_close_taxonomy(tax);
+					free(*parent_taxids_p);
+					free(*rank_names_p);
 					fclose(file);
 					closedir(tax_dir);
-					free(parent_taxids);
-					free(rank_names);
-					return NULL;
+					return -1;
 				}
 
 				(tax->taxa)->taxon[n].idx = n;
@@ -1423,22 +1387,21 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 					elt[strlen(elt)-1] = '\0';
 
 					if (i == 1)
-						parent_taxids[n] = atoi(elt);
+						(*parent_taxids_p)[n] = atoi(elt);
 					else if (i == 2)
 					{
-						rank_names[n] = (char*) malloc((strlen(elt)+1) * sizeof(char));
-						if (rank_names[n] == NULL)
+						(*rank_names_p)[n] = (char*) malloc((strlen(elt)+1) * sizeof(char));
+						if ((*rank_names_p)[n] == NULL)
 						{
 							obi_set_errno(OBI_MALLOC_ERROR);
 							obidebug(1, "\nError allocating memory for taxon rank name");
-							obi_close_taxonomy(tax);
+							free(*parent_taxids_p);
+							free(*rank_names_p);
 							fclose(file);
 							closedir(tax_dir);
-							free(parent_taxids);
-							free(rank_names);
-							return NULL;
+							return -1;
 						}
-						strcpy(rank_names[n], elt);
+						strcpy((*rank_names_p)[n], elt);
 					}
 					i++;
 				}
@@ -1450,12 +1413,11 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nError: file reading was stopped before end of file");
-				obi_close_taxonomy(tax);
+				free(*parent_taxids_p);
+				free(*rank_names_p);
 				fclose(file);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
-				return NULL;
+				return -1;
 			}
 
 			// Store count
@@ -1469,30 +1431,79 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError reallocating memory for taxonomy structure");
-				obi_close_taxonomy(tax);
+				free(*parent_taxids_p);
+				free(*rank_names_p);
 				fclose(file);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
-				return NULL;
+				return -1;
 			}
 
-			fclose(file);
+			if (fclose(file) < 0)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError closing a taxdump file");
+				free(*parent_taxids_p);
+				free(*rank_names_p);
+				closedir(tax_dir);
+				return -1;
+			}
 		}
 	}
-	closedir(tax_dir);
+	if (closedir(tax_dir) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError closing a taxdump directory");
+		free(*parent_taxids_p);
+		free(*rank_names_p);
+		closedir(tax_dir);
+		return -1;
+	}
+
+	if ( ! file_found)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError: could not find 'nodes.dmp' file in taxdump directory");
+		free(*parent_taxids_p);
+		free(*rank_names_p);
+		return -1;
+	}
+
+	return 0;
+}
 
 
-	// Go through directory again for next file		// TODO make separate functions?
+int read_delnodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t** delnodes_p, int32_t* delnodes_count)
+{
+	struct dirent* 	dp;
+	DIR*           	tax_dir;
+	FILE*          	file;
+	char*			file_name;
+	bool           	file_found=false;
+	char 			line[2048];			// TODO large enough?
+	char*			elt;
+	int				buffer_size;
+	int			    n;
+	int 		    old_taxid;
+
+	buffer_size = 10000;
+
+	// Initializing the list of deleted nodes
+	*delnodes_p = (int32_t*) malloc(sizeof(int32_t) * buffer_size);
+	if (*delnodes_p == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating the memory for the deleted taxids array");
+		return -1;
+	}
+
 	tax_dir = opendir(taxdump);
 	if (tax_dir == NULL)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
 		obidebug(1, "\nProblem opening a taxdump directory");
-		obi_close_taxonomy(tax);
-		free(parent_taxids);
-		free(rank_names);
-		return NULL;
+		closedir(tax_dir);
+		free(*delnodes_p);
+		return -1;
 	}
 
 	// Go through taxonomy files
@@ -1500,34 +1511,17 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	{
 		if (strcmp(dp->d_name, "delnodes.dmp") == 0)
 		{
-			delnodes_found = true;
-			buffer_size = 10000;
-
-			// Initializing the list of deleted nodes
-			delnodes = (int32_t*) malloc(sizeof(int32_t) * buffer_size);
-			if (delnodes == NULL)
-			{
-				obi_set_errno(OBI_MALLOC_ERROR);
-				obidebug(1, "\nError allocating the memory for a taxonomy structure");
-				obi_close_taxonomy(tax);
-				free(parent_taxids);
-				free(rank_names);
-				closedir(tax_dir);
-				return NULL;
-			}
+			file_found = true;
 
 			// Allocating the memory for the file name
-			file_name =	(char*) malloc((strlen(taxdump) + 12)*sizeof(char));
+			file_name =	(char*) malloc((strlen(taxdump) + 14)*sizeof(char));
 			if (file_name == NULL)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError allocating the memory for a file name");
-				obi_close_taxonomy(tax);
-				free(parent_taxids);
-				free(rank_names);
-				free(delnodes);
 				closedir(tax_dir);
-				return NULL;
+				free(*delnodes_p);
+				return -1;
 			}
 
 			// Build the file path
@@ -1535,13 +1529,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nError building a taxonomy file name");
-				obi_close_taxonomy(tax);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
+				free(*delnodes_p);
 				free(file_name);
-				free(delnodes);
-				return NULL;
+				return -1;
 			}
 
 			file = fopen(file_name, "r");
@@ -1549,13 +1540,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nProblem opening a taxonomy file");
-				obi_close_taxonomy(tax);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
 				free(file_name);
-				free(delnodes);
-				return NULL;
+				free(*delnodes_p);
+				return -1;
 			}
 
 			free(file_name);
@@ -1568,13 +1556,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				{
 					obi_set_errno(OBI_TAXONOMY_ERROR);
 					obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file");
-					obi_close_taxonomy(tax);
 					fclose(file);
 					closedir(tax_dir);
-					free(parent_taxids);
-					free(rank_names);
-					free(delnodes);
-					return NULL;
+					free(*delnodes_p);
+					return -1;
 				}
 
 				// Get first and only element of the line (the deprecated taxid)
@@ -1589,21 +1574,18 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				if (n == buffer_size)
 				{
 					buffer_size = buffer_size * 2;
-					delnodes = (int32_t*) realloc(tax->merged_idx, sizeof(int32_t) * buffer_size);
-					if (delnodes == NULL)
+					(*delnodes_p) = (int32_t*) realloc(tax->merged_idx, sizeof(int32_t) * buffer_size);
+					if ((*delnodes_p) == NULL)
 					{
 						obi_set_errno(OBI_MALLOC_ERROR);
 						obidebug(1, "\nError reallocating memory for a taxonomy structure");
-						obi_close_taxonomy(tax);
 						fclose(file);
 						closedir(tax_dir);
-						free(parent_taxids);
-						free(rank_names);
-						return NULL;
+						return -1;
 					}
 				}
 
-				delnodes[n] = old_taxid;
+				(*delnodes_p)[n] = old_taxid;
 				n++;
 			}
 
@@ -1612,35 +1594,67 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nError: file reading was stopped before end of file");
-				obi_close_taxonomy(tax);
 				fclose(file);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
-				free(delnodes);
-				return NULL;
+				free(*delnodes_p);
+				return -1;
 			}
 
 			// Store count
-			delnodes_count = n;
+			*delnodes_count = n;
 
-			fclose(file);
+			if (fclose(file) < 0)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError closing a taxdump file");
+				free(*delnodes_p);
+				closedir(tax_dir);
+				return -1;
+			}
 		}
 	}
-	closedir(tax_dir);
+	if (closedir(tax_dir) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError closing a taxdump directory");
+		free(*delnodes_p);
+		return -1;
+	}
+
+	if ( ! file_found)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError: could not find 'delnodes.dmp' file in taxdump directory");
+		free(*delnodes_p);
+		return -1;
+	}
+
+	return 0;
+}
 
 
-	// Go through directory again for next file		// TODO make separate functions?
+int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnodes, int32_t delnodes_count)
+{
+	int				n, nD, nT;
+	int				taxid, old_taxid;
+	ecotx_t* 		t;
+	struct dirent* 	dp;
+	DIR*           	tax_dir;
+	FILE*          	file;
+	char*			file_name;
+	bool            file_found=false;
+	char 			line[2048];			// TODO large enough?
+	char*			elt;
+	int				buffer_size;
+
+	buffer_size = 10000;
+
 	tax_dir = opendir(taxdump);
 	if (tax_dir == NULL)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
 		obidebug(1, "\nProblem opening a taxdump directory");
-		obi_close_taxonomy(tax);
-		free(parent_taxids);
-		free(rank_names);
-		free(delnodes);
-		return NULL;
+		return -1;
 	}
 
 	// Go through taxonomy files
@@ -1648,8 +1662,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	{
 		if (strcmp(dp->d_name, "merged.dmp") == 0)
 		{
-			merged_found = true;
-			buffer_size = 10000;
+			file_found = true;
 
 			// Initializing the merged structure
 			tax->merged_idx = (ecomergedidx_t*) malloc(sizeof(ecomergedidx_t) + sizeof(ecomerged_t) * buffer_size);
@@ -1657,12 +1670,8 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError allocating the memory for a taxonomy structure");
-				obi_close_taxonomy(tax);
-				free(parent_taxids);
-				free(rank_names);
-				free(delnodes);
 				closedir(tax_dir);
-				return NULL;
+				return -1;
 			}
 
 			// Allocating the memory for the file name
@@ -1671,12 +1680,8 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError allocating the memory for a file name");
-				obi_close_taxonomy(tax);
-				free(parent_taxids);
-				free(rank_names);
-				free(delnodes);
 				closedir(tax_dir);
-				return NULL;
+				return -1;
 			}
 
 			// Build the file path
@@ -1684,13 +1689,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nError building a taxonomy file name");
-				obi_close_taxonomy(tax);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
 				free(file_name);
-				free(delnodes);
-				return NULL;
+				return -1;
 			}
 
 			file = fopen(file_name, "r");
@@ -1698,13 +1699,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nProblem opening a taxonomy file");
-				obi_close_taxonomy(tax);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
 				free(file_name);
-				free(delnodes);
-				return NULL;
+				return -1;
 			}
 
 			free(file_name);
@@ -1719,13 +1716,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				{
 					obi_set_errno(OBI_TAXONOMY_ERROR);
 					obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file");
-					obi_close_taxonomy(tax);
-					fclose(file);
 					closedir(tax_dir);
-					free(parent_taxids);
-					free(rank_names);
-					free(delnodes);
-					return NULL;
+					fclose(file);
+					return -1;
 				}
 
 				// Parse the 2 elements separated by '|'
@@ -1762,13 +1755,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 							{
 								obi_set_errno(OBI_MALLOC_ERROR);
 								obidebug(1, "\nError reallocating memory for a taxonomy structure");
-								obi_close_taxonomy(tax);
-								fclose(file);
 								closedir(tax_dir);
-								free(parent_taxids);
-								free(rank_names);
-								free(delnodes);
-								return NULL;
+								fclose(file);
+								return -1;
 							}
 						}
 
@@ -1788,13 +1777,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 							{
 								obi_set_errno(OBI_MALLOC_ERROR);
 								obidebug(1, "\nError reallocating memory for a taxonomy structure");
-								obi_close_taxonomy(tax);
-								fclose(file);
 								closedir(tax_dir);
-								free(parent_taxids);
-								free(rank_names);
-								free(delnodes);
-								return NULL;
+								fclose(file);
+								return -1;
 							}
 						}
 
@@ -1815,13 +1800,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 					{
 						obi_set_errno(OBI_MALLOC_ERROR);
 						obidebug(1, "\nError reallocating memory for a taxonomy structure");
-						obi_close_taxonomy(tax);
-						fclose(file);
 						closedir(tax_dir);
-						free(parent_taxids);
-						free(rank_names);
-						free(delnodes);
-						return NULL;
+						fclose(file);
+						return -1;
 					}
 				}
 
@@ -1839,12 +1820,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nError: file reading was stopped before end of file");
-				obi_close_taxonomy(tax);
-				fclose(file);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
-				return NULL;
+				fclose(file);
+				return -1;
 			}
 
 			// Store count
@@ -1856,34 +1834,63 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError reallocating memory for a a taxonomy structure");
-				obi_close_taxonomy(tax);
-				fclose(file);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
-				return NULL;
+				fclose(file);
+				return -1;
 			}
 
-			fclose(file);
+			if (fclose(file) < 0)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError closing a taxdump file");
+				closedir(tax_dir);
+				return -1;
+			}
 		}
 	}
+	if (closedir(tax_dir) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError closing a taxdump directory");
+		closedir(tax_dir);
+		return -1;
+	}
+
+	if ( ! file_found)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError: could not find 'merged.dmp' file in taxdump directory");
+		return -1;
+	}
 
 	// Free delnodes array, not needed anymore
 	free(delnodes);
 
-	closedir(tax_dir);
+	return 0;
+}
 
 
-	// Go through directory again for next file
+int read_names_dmp(const char* taxdump, OBIDMS_taxonomy_p tax)
+{
+	int				i, j, n;
+	int				taxid;
+	struct dirent* 	dp;
+	DIR*           	tax_dir;
+	FILE*          	file;
+	char*			file_name;
+	bool            file_found=false;
+	char 			line[2048];			// TODO large enough?
+	char*			elt;
+	int				buffer_size;
+
+	buffer_size = 10000;
+
 	tax_dir = opendir(taxdump);
 	if (tax_dir == NULL)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
 		obidebug(1, "\nProblem opening a taxdump directory");
-		obi_close_taxonomy(tax);
-		free(parent_taxids);
-		free(rank_names);
-		return NULL;
+		return -1;
 	}
 
 	// Go through taxonomy files
@@ -1891,8 +1898,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	{
 		if (strcmp(dp->d_name, "names.dmp") == 0)
 		{
-			names_found = true;
-			buffer_size = 10000;
+			file_found = true;
 
 			// Initializing the names structure
 			tax->names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * buffer_size);
@@ -1900,11 +1906,8 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError allocating the memory for a taxonomy structure");
-				obi_close_taxonomy(tax);
-				free(parent_taxids);
-				free(rank_names);
 				closedir(tax_dir);
-				return NULL;
+				return -1;
 			}
 
 			// Allocating the memory for the file name
@@ -1913,11 +1916,8 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError allocating the memory for a file name");
-				obi_close_taxonomy(tax);
-				free(parent_taxids);
-				free(rank_names);
 				closedir(tax_dir);
-				return NULL;
+				return -1;
 			}
 
 			// Build the file path
@@ -1925,12 +1925,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nError building a taxonomy file name");
-				obi_close_taxonomy(tax);
-				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
 				free(file_name);
-				return NULL;
+				closedir(tax_dir);
+				return -1;
 			}
 
 			file = fopen(file_name, "r");
@@ -1938,12 +1935,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nProblem opening a taxonomy file");
-				obi_close_taxonomy(tax);
-				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
 				free(file_name);
-				return NULL;
+				closedir(tax_dir);
+				return -1;
 			}
 
 			free(file_name);
@@ -1961,12 +1955,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 					{
 						obi_set_errno(OBI_MALLOC_ERROR);
 						obidebug(1, "\nError reallocating memory for a taxonomy structure");
-						obi_close_taxonomy(tax);
 						fclose(file);
 						closedir(tax_dir);
-						free(parent_taxids);
-						free(rank_names);
-						return NULL;
+						return -1;
 					}
 				}
 
@@ -1975,12 +1966,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				{
 					obi_set_errno(OBI_TAXONOMY_ERROR);
 					obidebug(1, "\nError: line buffer size not large enough for line in taxonomy file");
-					obi_close_taxonomy(tax);
 					fclose(file);
 					closedir(tax_dir);
-					free(parent_taxids);
-					free(rank_names);
-					return NULL;
+					return -1;
 				}
 
 				// Parse 4 first elements separated by '|'
@@ -2000,12 +1988,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 				{
 					obi_set_errno(OBI_TAXONOMY_ERROR);
 					obidebug(1, "\nError: could not find taxon associated to name when reading taxdump");
-					obi_close_taxonomy(tax);
 					fclose(file);
 					closedir(tax_dir);
-					free(parent_taxids);
-					free(rank_names);
-					return NULL;
+					return -1;
 				}
 				j = i;	// Because there are several names by taxon but they are in the same order
 				(tax->names)->names[n].taxon = ((tax->taxa)->taxon)+i;
@@ -2029,9 +2014,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 							obi_close_taxonomy(tax);
 							fclose(file);
 							closedir(tax_dir);
-							free(parent_taxids);
-							free(rank_names);
-							return NULL;
+							return -1;
 						}
 						strcpy((tax->names)->names[n].name, elt);
 					}
@@ -2042,12 +2025,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 						{
 							obi_set_errno(OBI_MALLOC_ERROR);
 							obidebug(1, "\nError allocating memory for a taxon class name");
-							obi_close_taxonomy(tax);
 							fclose(file);
 							closedir(tax_dir);
-							free(parent_taxids);
-							free(rank_names);
-							return NULL;
+							return -1;
 						}
 						strcpy((tax->names)->names[n].class_name, elt);
 						if (strcmp(elt, "scientific name") == 0)
@@ -2067,12 +2047,9 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_TAXONOMY_ERROR);
 				obidebug(1, "\nError: file reading was stopped before end of file");
-				obi_close_taxonomy(tax);
 				fclose(file);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
-				return NULL;
+				return -1;
 			}
 
 			// Store count
@@ -2084,35 +2061,109 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 			{
 				obi_set_errno(OBI_MALLOC_ERROR);
 				obidebug(1, "\nError reallocating memory for a a taxonomy structure");
-				obi_close_taxonomy(tax);
 				fclose(file);
 				closedir(tax_dir);
-				free(parent_taxids);
-				free(rank_names);
-				return NULL;
+				return -1;
 			}
 
-			fclose(file);
+			if (fclose(file) < 0)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError closing a taxdump file");
+				closedir(tax_dir);
+				return -1;
+			}
 		}
 	}
-	closedir(tax_dir);
-
-	if (!nodes_found)
+	if (closedir(tax_dir) < 0)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
-		obidebug(1, "\nProblem reading taxdump: nodes.dmp file not found");
-		obi_close_taxonomy(tax);
-		free(parent_taxids);
-		free(rank_names);
+		obidebug(1, "\nError closing a taxdump directory");
+		closedir(tax_dir);
+		return -1;
+	}
+
+	if ( ! file_found)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError: could not find 'merged.dmp' file in taxdump directory");
+		return -1;
+	}
+
+	return 0;
+}
+
+
+OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
+{
+	OBIDMS_taxonomy_p tax;
+	char**		      rank_names=NULL;
+	int*		      parent_taxids=NULL;
+	int32_t*          delnodes=NULL;
+	int32_t           delnodes_count;
+	bool			  already_in;
+	ecotx_t* 		  t;
+	int				  buffer_size;
+	int			      i, j;
+
+	// Initialize taxonomy structure
+	tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
+	if (tax == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating the memory for a taxonomy structure");
 		return NULL;
 	}
-	if (!names_found)
+	tax->ranks      = NULL;
+	tax->taxa       = NULL;
+	tax->names      = NULL;
+	tax->merged_idx = NULL;
+
+	tax->dms = NULL;
+	(tax->tax_name)[0] = '\0';
+
+	// TODO check if taxdump path is for a gz file to unzip or a directory
+
+	// READ NODES.DMP
+	if (read_nodes_dmp(taxdump, tax, &rank_names, &parent_taxids) < 0)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
-		obidebug(1, "\nProblem reading taxdump: names.dmp file not found");
+		obidebug(1, "\nProblem reading 'nodes.dmp'");
+		obi_close_taxonomy(tax);
+		return NULL;
+	}
+
+	// READ DELNODES.DMP
+	if (read_delnodes_dmp(taxdump, tax, &delnodes, &delnodes_count) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nProblem reading 'delnodes.dmp'");
 		obi_close_taxonomy(tax);
-		free(parent_taxids);
 		free(rank_names);
+		free(parent_taxids);
+		return NULL;
+	}
+
+	// READ MERGED.DMP
+	if (read_merged_dmp(taxdump, tax, delnodes, delnodes_count) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nProblem reading 'merged.dmp'");
+		obi_close_taxonomy(tax);
+		free(delnodes);
+		free(rank_names);
+		free(parent_taxids);
+		return NULL;
+	}
+
+	// READ NAMES.DMP
+	if (read_names_dmp(taxdump, tax) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nProblem reading 'names.dmp'");
+		obi_close_taxonomy(tax);
+		free(rank_names);
+		free(parent_taxids);
 		return NULL;
 	}
 
@@ -2246,7 +2297,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 
 	(tax->taxa)->buffer_size = (tax->taxa)->count;
 
-	// Compute longest branches
+	// Compute longest branches (used to compute distances between taxa faster)
 	for (i=0; i < (tax->taxa)->count; i++)
 	{
 		t = (((tax->taxa))->taxon)+i;
@@ -2281,8 +2332,8 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const
 {
 	int32_t    taxid;
 	ecotx_t*   taxon;
-	econame_t* name_struct;
-	int       i;
+	int        i;
+//	econame_t* name_struct;
 
 	// Enlarge the structure memory for a new taxon
 	tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (((tax->taxa)->count) + 1));
@@ -2323,7 +2374,7 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const
 			break;
 		}
 	}
-	if (taxon->rank == -1)	// TODO Discuss possibility of creating rank if doesn't exist
+	if (taxon->rank == -1)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
 		obidebug(1, "\nError: taxon rank not found when adding a new taxon");
@@ -2336,7 +2387,7 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const
 		obidebug(1, "\nError: taxon parent not found when adding a new taxon");
 		return -1;
 	}
-	taxon->farest = 0;	// TODO not sure
+	taxon->farest = 0;
 
 	// Update taxonomy counts etc
 	(tax->taxa)->max_taxid = taxid;
@@ -2344,42 +2395,42 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const
 	((tax->taxa)->local_count)++;
 	(tax->taxa)->buffer_size = (tax->taxa)->count;
 
-	// Add new name in names structure		// TODO discuss because in OBITools1 the new names were not written in .ndx
-	// Allocate memory for new name
-	tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1));
-	if (tax->names == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
-		return -1;
-	}
-
-	// Add new name
-	name_struct = (tax->names)->names + ((tax->names)->count);
-	name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
-	if (name_struct->name == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
-		return -1;
-	}
-	strcpy(name_struct->name, name);
-	name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char));
-	if (name_struct->class_name == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon");
-		return -1;
-	}
-	strcpy(name_struct->class_name, "scientific name");
-	name_struct->is_scientific_name = true;
-	name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1;
-
-	// Sort names in alphabetical order
-	qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names);
-
-	// Update name count
-	((tax->names)->count)++;
+//	// Add new name in names structure     // Commented because the new name was not added in the .ndx file in the OBITools1
+//	// Allocate memory for new name
+//	tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1));
+//	if (tax->names == NULL)
+//	{
+//		obi_set_errno(OBI_MALLOC_ERROR);
+//		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
+//		return -1;
+//	}
+//
+//	// Add new name
+//	name_struct = (tax->names)->names + ((tax->names)->count);
+//	name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
+//	if (name_struct->name == NULL)
+//	{
+//		obi_set_errno(OBI_MALLOC_ERROR);
+//		obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
+//		return -1;
+//	}
+//	strcpy(name_struct->name, name);
+//	name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char));
+//	if (name_struct->class_name == NULL)
+//	{
+//		obi_set_errno(OBI_MALLOC_ERROR);
+//		obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon");
+//		return -1;
+//	}
+//	strcpy(name_struct->class_name, "scientific name");
+//	name_struct->is_scientific_name = true;
+//	name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1;
+//
+//	// Sort names in alphabetical order
+//	qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names);
+//
+//	// Update name count
+//	((tax->names)->count)++;
 
 	return taxid;
 }
@@ -2445,7 +2496,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 	if (tax->ranks == NULL)
 	{
  		obi_set_errno(OBI_TAXONOMY_ERROR);
- 		obidebug(1, "\nError building ranks file name");
+ 		obidebug(1, "\nError reading taxonomy ranks file (check taxonomy name spelling)");
 		free(taxonomy_path);
 		free(ranks_file_name);
 		free(tax);

From c065c1914ac6cf3d740e8f2d285a019bab37b102 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Mon, 16 Jan 2017 17:28:20 +0100
Subject: [PATCH 16/22] Taxonomy: adding, writing and reading preferred names,
 changed some function names, and fixed a bug with taxa indices not being
 properly initialized

---
 python/obitools3/obidms/_obitaxo.pxd         |   3 +-
 python/obitools3/obidms/_obitaxo.pyx         |  40 +-
 python/obitools3/obidms/capi/obitaxonomy.pxd |  10 +-
 src/obidms_taxonomy.c                        | 435 +++++++++++++++++--
 src/obidms_taxonomy.h                        |  13 +-
 5 files changed, 444 insertions(+), 57 deletions(-)

diff --git a/python/obitools3/obidms/_obitaxo.pxd b/python/obitools3/obidms/_obitaxo.pxd
index 51c6c0f..671d5e1 100644
--- a/python/obitools3/obidms/_obitaxo.pxd
+++ b/python/obitools3/obidms/_obitaxo.pxd
@@ -17,4 +17,5 @@ cdef class OBI_Taxonomy :
  
 cdef class OBI_Taxon :
 
-    cdef ecotx_t* _pointer
+    cdef ecotx_t*      _pointer
+    cdef OBI_Taxonomy  _tax
diff --git a/python/obitools3/obidms/_obitaxo.pyx b/python/obitools3/obidms/_obitaxo.pyx
index b9d7e98..3aae515 100644
--- a/python/obitools3/obidms/_obitaxo.pyx
+++ b/python/obitools3/obidms/_obitaxo.pyx
@@ -7,10 +7,10 @@ from .capi.obitaxonomy cimport obi_read_taxonomy, \
                                obi_write_taxonomy, \
                                obi_close_taxonomy, \
                                obi_taxo_get_taxon_with_taxid, \
-                               obi_taxonomy_add_local_taxon, \
+                               obi_taxo_add_local_taxon, \
+                               obi_taxo_add_preferred_name_with_taxon, \
                                ecotx_t                            
 
-
 from ._obidms cimport OBIDMS
 
 from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
@@ -42,11 +42,11 @@ cdef class OBI_Taxonomy :
             if taxon_p == NULL :
                 raise Exception("Taxon not found")
             taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
-            return OBI_Taxon(taxon_capsule)
+            return OBI_Taxon(taxon_capsule, self)
         else :
             raise Exception("Not implemented")
-        
-    
+
+
     def __iter__(self):
          
         cdef ecotx_t* taxa
@@ -60,7 +60,7 @@ cdef class OBI_Taxonomy :
         for t in range(self._pointer.taxa.count):
             taxon_p = <ecotx_t*> (taxa+t)
             taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
-            yield OBI_Taxon(taxon_capsule)
+            yield OBI_Taxon(taxon_capsule, self)
 
 
     cpdef write(self, str prefix) :
@@ -70,7 +70,7 @@ cdef class OBI_Taxonomy :
     
     cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
         cdef int taxid
-        taxid = obi_taxonomy_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
+        taxid = obi_taxo_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
         if taxid < 0 :
             raise Exception("Error adding a new taxon to the taxonomy")
         else :
@@ -85,10 +85,11 @@ cdef class OBI_Taxonomy :
 
 cdef class OBI_Taxon :    # TODO dict subclass?
 
-    def __init__(self, object taxon_capsule) :
+    def __init__(self, object taxon_capsule, OBI_Taxonomy tax) :
         self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
         if self._pointer == NULL :
-            raise Exception("Error reading the taxonomy")
+            raise Exception("Error reading a taxon (NULL pointer)")
+        self._tax = tax
 
     # name property getter
     @property
@@ -115,14 +116,25 @@ cdef class OBI_Taxon :    # TODO dict subclass?
     def parent(self):
         cdef object parent_capsule
         parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
-        return OBI_Taxon(parent_capsule)
+        return OBI_Taxon(parent_capsule, self._tax)
+
+    # preferred name property getter and setter
+    @property
+    def preferred_name(self):
+        if self._pointer.preferred_name != NULL :
+            return bytes2str(self._pointer.preferred_name)
+    @preferred_name.setter
+    def preferred_name(self, str new_preferred_name) :  # @DuplicatedSignature
+        if (obi_taxo_add_preferred_name_with_taxon(self._tax._pointer, self._pointer, str2bytes(new_preferred_name)) < 0) :
+            raise Exception("Error adding a new preferred name to a taxon")
 
     def __repr__(self):
         d = {}
-        d['taxid']   = self.taxid
-        d['name']    = self.name
-        d['parent']  = self.parent.taxid
-        d['farest']  = self.farest
+        d['taxid']          = self.taxid
+        d['name']           = self.name
+        d['preferred name'] = self.preferred_name
+        d['parent']         = self.parent.taxid
+        d['farest']         = self.farest
         return str(d)
     
     
diff --git a/python/obitools3/obidms/capi/obitaxonomy.pxd b/python/obitools3/obidms/capi/obitaxonomy.pxd
index 99cd7e4..d90693c 100644
--- a/python/obitools3/obidms/capi/obitaxonomy.pxd
+++ b/python/obitools3/obidms/capi/obitaxonomy.pxd
@@ -13,7 +13,8 @@ cdef extern from "obidms_taxonomy.h" nogil:
         int32_t           farest
         ecotxnode*        parent
         char*             name
-    
+        char*             preferred_name
+
     ctypedef ecotxnode ecotx_t
 
 
@@ -56,4 +57,9 @@ cdef extern from "obidms_taxonomy.h" nogil:
     
     ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
     
-    int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
+    int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
+
+    int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
+
+    int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
+
diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c
index 6295be8..9e081f0 100644
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@@ -246,7 +246,7 @@ FILE* open_ecorecorddb(const char* file_name,
 }
 
 
-ecorankidx_t* read_rankidx(const char* ranks_file_name)
+ecorankidx_t* read_ranks_idx(const char* ranks_file_name)
 {
 	int32_t      			count;
 	FILE*        			ranks_file;
@@ -301,7 +301,7 @@ ecorankidx_t* read_rankidx(const char* ranks_file_name)
 }
 
 
-ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name)
+ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name)
 {
 	int32_t      	  count_taxa;
 	int32_t      	  count_local_taxa;
@@ -341,10 +341,12 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
 	for (i=0; i<count_taxa; i++)
 	{
 		readnext_ecotaxon(f_taxa, &(taxa_index->taxon[i]));
+		taxa_index->taxon[i].idx = i;
 		taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
 		taxa_index->taxon[i].parent->farest = 0;
 		if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
 			taxa_index->max_taxid = taxa_index->taxon[i].taxid;
+		taxa_index->taxon[i].preferred_name = NULL;
 	}
 
 	if (count_local_taxa > 0)
@@ -361,6 +363,7 @@ ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_
 		taxa_index->taxon[i].parent->farest=0;
 		if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
 			taxa_index->max_taxid = taxa_index->taxon[i].taxid;
+		taxa_index->taxon[i].preferred_name = NULL;
 	}
 
 	for (i=0; i < count_taxa; i++)
@@ -431,7 +434,60 @@ econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy
 }
 
 
-econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
+econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
+{
+	econameformat_t* raw;
+	int32_t          record_length;
+
+	raw = read_ecorecord(f, &record_length);
+	if (raw == NULL)
+ 		return NULL;
+
+	name->is_scientific_name = raw->is_scientific_name;
+
+	name->name = malloc((raw->name_length + 1) * sizeof(char));
+	if (name->name == NULL)
+	{
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError allocating memory for a taxon preferred name");
+ 		free(raw);
+ 		return NULL;
+	}
+	strncpy(name->name, raw->names, raw->name_length);
+	name->name[raw->name_length] = 0;
+
+	name->class_name = malloc((raw->class_length+1) * sizeof(char));
+	if (name->class_name == NULL)
+	{
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError allocating memory for a taxon class name");
+ 		free(name->name);
+ 		free(raw);
+ 		return NULL;
+	}
+	strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
+	name->class_name[raw->class_length] = 0;
+
+	name->taxon = taxonomy->taxa->taxon + raw->taxid;
+
+	// Add the preferred name in the taxon structure 	// TODO discuss: couldn't they all use the same pointer?
+	(taxonomy->taxa->taxon + raw->taxid)->preferred_name = malloc((raw->name_length + 1) * sizeof(char));
+	if ((taxonomy->taxa->taxon + raw->taxid)->preferred_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a taxon preferred name");
+		free(name->name);
+		free(name->class_name);
+		free(raw);
+		return NULL;
+	}
+	strcpy((taxonomy->taxa->taxon + raw->taxid)->preferred_name, name->name);
+
+	return name;
+}
+
+
+econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 {
 	int32_t      		count;
 	FILE*				f;
@@ -440,10 +496,7 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 
 	f = open_ecorecorddb(file_name, &count, 0);
 	if (f == NULL)
-	{
- 		obidebug(1, "\nError reading taxonomy name file");
- 		return NULL;
-	}
+		return NULL;
 
 	index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
 	if (index_names == NULL)
@@ -473,9 +526,46 @@ econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 }
 
 
+econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
+{
+	int32_t      		count;
+	FILE*				f;
+	econameidx_t*		index_names;
+	int32_t      		i;
+
+	f = open_ecorecorddb(file_name, &count, 0);
+	if (f == NULL)
+		return NULL;
+
+	index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * count);
+	if (index_names == NULL)
+	{
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError reading taxonomy name file");
+ 		return NULL;
+	}
+
+	index_names->count = count;
+
+	for (i=0; i < count; i++)
+	{
+		readnext_ecopreferredname(f, (index_names->names)+i, taxonomy);
+		if ((index_names->names)+i == NULL)
+		{
+	 		obi_set_errno(OBI_TAXONOMY_ERROR);
+	 		obidebug(1, "\nError reading taxonomy name file");
+	 		free(index_names);
+	 		return NULL;
+		}
+	}
+
+	fclose(f);
+
+	return index_names;
+}
 
 
-ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
+ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 {
 	int32_t      		count;
 	FILE*				f;
@@ -528,7 +618,7 @@ ecomergedidx_t* read_mergedidx(const char *file_name, OBIDMS_taxonomy_p taxonomy
 
 // Functions to write taxonomy structure to binary files
 
-int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
+int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
 {
 	int     i;
 	char* 	file_name;
@@ -631,7 +721,7 @@ int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
 }
 
 
-int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
+int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
 {
 	int     i;
 	char* 	file_name;
@@ -905,7 +995,7 @@ int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* ta
 }
 
 
-int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
+int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
 {
 	int     i;
 	char* 	file_name;
@@ -1053,7 +1143,155 @@ int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name
 }
 
 
-int write_mergedidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
+int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
+{
+	int     i;
+	char* 	file_name;
+	int 	file_descriptor;
+	off_t 	file_size;
+	char*   taxonomy_path;
+	int32_t	name_length;
+	int32_t	class_length;
+	int32_t record_size;
+
+	// Compute file size
+	file_size = sizeof(int32_t);	// To store record count
+	for (i=0; i < (tax->preferred_names)->count; i++)
+	{
+		file_size = file_size + sizeof(int32_t) * 5;						// To store record size, taxid, rank index, parent index, and name length
+		file_size = file_size + strlen(tax->preferred_names->names[i].name);			// To store name
+		file_size = file_size + strlen(tax->preferred_names->names[i].class_name);	// To store name
+	}
+
+	// Build the taxonomy directory path
+	taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
+
+	file_name =	(char*) malloc((strlen(taxonomy_path) + strlen(taxonomy_name) + 6)*sizeof(char));
+	if (file_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating the memory for a binary taxonomy file name");
+		return -1;
+	}
+
+	// Build the file path
+	if (sprintf(file_name, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError building a binary taxonomy file name");
+		return -1;
+	}
+
+	free(taxonomy_path);
+
+	// Create file
+	file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
+	if (file_descriptor < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError creating a binary taxonomy file");
+		free(file_name);
+		return -1;
+	}
+
+	free(file_name);
+
+	// Truncate the file to the right size
+	if (ftruncate(file_descriptor, file_size) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError truncating a binary taxonomy file");
+		close(file_descriptor);
+		return -1;
+	}
+
+	// Write record count
+	if (write(file_descriptor, &(tax->preferred_names->count), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError writing in a binary taxonomy file");
+		close(file_descriptor);
+		return -1;
+	}
+
+	// Write records
+	for (i=0; i < tax->preferred_names->count; i++)
+	{
+		name_length = strlen(tax->preferred_names->names[i].name);
+		class_length = strlen(tax->preferred_names->names[i].class_name);
+		record_size = 4*sizeof(int32_t) + name_length + class_length;
+
+		// Write record size
+		if (write(file_descriptor, &record_size, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+		// Write if the name is a scientific name
+		if (write(file_descriptor, &(tax->preferred_names->names[i].is_scientific_name), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+		// Write name length
+		if (write(file_descriptor, &name_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+		// Write class length
+		if (write(file_descriptor, &class_length, sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+		// Write taxid index
+		if (write(file_descriptor, &(tax->preferred_names->names[i].taxon->idx), sizeof(int32_t)) < ((ssize_t) sizeof(int32_t)))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+		// Write name
+		if (write(file_descriptor, tax->preferred_names->names[i].name, name_length) < ((ssize_t) name_length))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+		// Write class
+		if (write(file_descriptor, tax->preferred_names->names[i].class_name, class_length) < ((ssize_t) class_length))
+		{
+			obi_set_errno(OBI_TAXONOMY_ERROR);
+			obidebug(1, "\nError writing in a binary taxonomy file");
+			close(file_descriptor);
+			return -1;
+		}
+	}
+
+	// Close file
+	if (close(file_descriptor) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError closing a pdx taxonomy file");
+		return -1;
+	}
+
+	return 0;
+}
+
+
+int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
 {
 	int     i;
 	char* 	file_name;
@@ -1182,19 +1420,22 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
 
 	free(taxonomy_path);
 
-    if (write_rankidx(dms, tax, tax_name) < 0)
-         return -1;
-     if (write_taxonomyidx(dms, tax, tax_name) < 0)
-         return -1;
-     if (write_nameidx(dms, tax, tax_name) < 0)
-         return -1;
-     if (write_mergedidx(dms, tax, tax_name) < 0)
-         return -1;
-     // Check if there are local taxa (if so last taxon is local)
-     if ((tax->taxa)->local_count > 0)
-    	 if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
-    		 return -1;
-
+    if (write_ranks_idx(dms, tax, tax_name) < 0)
+        return -1;
+    if (write_taxonomy_idx(dms, tax, tax_name) < 0)
+        return -1;
+    if (write_names_idx(dms, tax, tax_name) < 0)
+    	return -1;
+    if (write_merged_idx(dms, tax, tax_name) < 0)
+    	return -1;
+    // Check if there are local taxa (if so last taxon is local)
+    if ((tax->taxa)->local_count > 0)
+    	if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
+    		return -1;
+    // Write preferred names if there are some
+    if (tax->preferred_names != NULL)
+    	if (write_preferred_names_idx(dms, tax, tax_name) < 0)
+    		return -1;
 	return 0;
 }
 
@@ -2114,10 +2355,11 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 		obidebug(1, "\nError allocating the memory for a taxonomy structure");
 		return NULL;
 	}
-	tax->ranks      = NULL;
-	tax->taxa       = NULL;
-	tax->names      = NULL;
-	tax->merged_idx = NULL;
+	tax->ranks      	 = NULL;
+	tax->taxa       	 = NULL;
+	tax->names      	 = NULL;
+	tax->preferred_names = NULL;
+	tax->merged_idx 	 = NULL;
 
 	tax->dms = NULL;
 	(tax->tax_name)[0] = '\0';
@@ -2295,6 +2537,10 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 		(((tax->taxa)->taxon)[i].parent)->farest = 0;
 	}
 
+	// Initialize preferred names to NULL
+	for (i=0; i < (tax->taxa)->count; i++)
+		((tax->taxa)->taxon)[i].preferred_name = NULL;
+
 	(tax->taxa)->buffer_size = (tax->taxa)->count;
 
 	// Compute longest branches (used to compute distances between taxa faster)
@@ -2328,7 +2574,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 }
 
 
-int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
+int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
 {
 	int32_t    taxid;
 	ecotx_t*   taxon;
@@ -2436,6 +2682,81 @@ int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const
 }
 
 
+int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
+{
+	ecotx_t* taxon;
+
+	taxon = obi_taxo_get_taxon_with_taxid(tax, taxid);
+
+	return obi_taxo_add_preferred_name_with_taxon(tax, taxon, preferred_name);
+}
+
+
+int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
+{
+	econame_t* name_struct;
+
+	// Free previous preferred name if there is one
+	if (taxon->preferred_name != NULL)
+		free(taxon->preferred_name);
+
+	taxon->preferred_name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
+	if (taxon->preferred_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a new preferred name for a taxon");
+		return -1;
+	}
+	strcpy(taxon->preferred_name, preferred_name);
+
+	// Add new name in preferred names structure
+	// Allocate or reallocate memory for new name
+	if (tax->preferred_names == NULL)
+	{
+		tax->preferred_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t));
+		(tax->preferred_names)->count = 0;
+	}
+	else
+		tax->preferred_names = (econameidx_t*) realloc(tax->preferred_names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->preferred_names)->count + 1));
+	if (tax->preferred_names == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new preferred name");
+		return -1;
+	}
+
+	// Add new preferred name
+	name_struct = (tax->preferred_names)->names + ((tax->preferred_names)->count);
+	name_struct->name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
+	if (name_struct->name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a new taxon preferred name");
+		return -1;
+	}
+	strcpy(name_struct->name, preferred_name);
+
+	name_struct->class_name = (char*) malloc((strlen("preferred name") + 1) * sizeof(char));
+	if (name_struct->class_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a taxon class name to add a new preferred name");
+		return -1;
+	}
+	strcpy(name_struct->class_name, "preferred name");
+	name_struct->is_scientific_name = false;
+	name_struct->taxon = taxon;
+
+	// Sort preferred names in alphabetical order
+	qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names);
+
+	// Update preferred name count
+	((tax->preferred_names)->count)++;
+
+	return 0;
+}
+
+
 /////// PUBLIC /////////
 
 
@@ -2448,6 +2769,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 	char*              merged_idx_file_name;
 	char*			   local_taxa_file_name;
 	char*			   alter_names_file_name;
+	char*			   pref_names_file_name;
 	int                buffer_size;
 
 	tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
@@ -2458,10 +2780,11 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
  		return NULL;
 	}
 
-	tax->ranks      = NULL;
-	tax->taxa       = NULL;
-	tax->names      = NULL;
-	tax->merged_idx = NULL;
+	tax->ranks           = NULL;
+	tax->taxa            = NULL;
+	tax->names           = NULL;
+	tax->preferred_names = NULL;
+	tax->merged_idx      = NULL;
 
 	tax->dms = dms;
 
@@ -2492,7 +2815,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 		free(tax);
 		return NULL;
 	}
-	tax->ranks = read_rankidx(ranks_file_name);
+	tax->ranks = read_ranks_idx(ranks_file_name);
 	if (tax->ranks == NULL)
 	{
  		obi_set_errno(OBI_TAXONOMY_ERROR);
@@ -2543,7 +2866,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 		obi_close_taxonomy(tax);
 		return NULL;
 	}
-	tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name);
+	tax->taxa = read_taxonomy_idx(taxa_file_name, local_taxa_file_name);
 	if (tax->taxa == NULL)
 	{
 		free(taxonomy_path);
@@ -2574,7 +2897,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 		obi_close_taxonomy(tax);
 		return NULL;
 	}
-    tax->merged_idx = read_mergedidx(merged_idx_file_name, tax);
+    tax->merged_idx = read_merged_idx(merged_idx_file_name, tax);
 	if (tax->merged_idx == NULL)
 	{
 		free(taxonomy_path);
@@ -2584,6 +2907,38 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 	}
     free(merged_idx_file_name);
 
+	// Read preferred names
+	pref_names_file_name = (char*) malloc(buffer_size*sizeof(char));
+	if (pref_names_file_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for alternative names file name");
+		free(taxonomy_path);
+		obi_close_taxonomy(tax);
+		return NULL;
+	}
+	if (snprintf(pref_names_file_name, buffer_size, "%s/%s.pdx", taxonomy_path, taxonomy_name) < 0)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError building alternative names file name");
+		free(taxonomy_path);
+		free(pref_names_file_name);
+		obi_close_taxonomy(tax);
+		return NULL;
+	}
+	tax->preferred_names = read_preferred_names_idx(pref_names_file_name, tax);
+	if (obi_errno)
+	{
+		free(taxonomy_path);
+		free(pref_names_file_name);
+		obi_close_taxonomy(tax);
+		return NULL;
+	}
+	free(pref_names_file_name);
+
+	if (tax->preferred_names != NULL)
+		fprintf(stderr, "\nPreferred names read");
+
 	// Read alternative names
 	if (read_alternative_names)
 	{
@@ -2605,7 +2960,7 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 			obi_close_taxonomy(tax);
 			return NULL;
 		}
-        tax->names = read_nameidx(alter_names_file_name, tax);
+        tax->names = read_names_idx(alter_names_file_name, tax);
     	if (tax->names == NULL)
     	{
 			free(taxonomy_path);
@@ -2637,6 +2992,10 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
 		if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
 			return -1;
 	}
+    // Write preferred names if there are some
+    if (taxonomy->preferred_names != NULL)
+    	if (write_preferred_names_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
+    		return -1;
 
 	if (taxonomy)
 	{
diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h
index 33d8aba..fa2f511 100644
--- a/src/obidms_taxonomy.h
+++ b/src/obidms_taxonomy.h
@@ -36,7 +36,8 @@ typedef struct ecotxnode {
 	int32_t  		  farest;
 	int32_t			  idx;
 	struct ecotxnode* parent;
-	char*			  name;
+	char*			  name; // scientific name
+	char*			  preferred_name; // preferred name
 	bool			  local;
 } ecotx_t;
 
@@ -98,6 +99,7 @@ typedef struct OBIDMS_taxonomy_t {
 	ecomergedidx_t* merged_idx;
 	ecorankidx_t*   ranks;
 	econameidx_t*   names;
+	econameidx_t*   preferred_names;
 	ecotxidx_t*     taxa;
 } OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
 
@@ -127,4 +129,11 @@ int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name
 
 OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
 
-int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
+int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
+
+int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
+
+int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);
+
+
+

From c0bcdce72450a58195b0feddb5da4a44f510cfc7 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Wed, 18 Jan 2017 18:22:49 +0100
Subject: [PATCH 17/22] Taxonomy: documentation for all the functions, and
 fixed bugs when closing the taxonomy (overwriting of .pdx files, missing
 freeing, and re-placed a misplaced condition)

---
 src/obidms_taxonomy.c | 1266 +++++++++++++++++++++++++++--------------
 src/obidms_taxonomy.h |  399 ++++++++++---
 2 files changed, 1179 insertions(+), 486 deletions(-)

diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c
index 9e081f0..aba8641 100644
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@@ -6,7 +6,7 @@
  * @file obidms_taxonomy.c
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  * @date March 2nd 2016
- * @brief Functions for reading binary taxonomy files.
+ * @brief Functions for handling the reading and writing of taxonomy files.
  */
 
 
@@ -29,7 +29,436 @@
 #define DEBUG_LEVEL 0	// TODO has to be defined somewhere else (cython compil flag?)
 
 
-int cmp_rank_labels(const void* label1, const void* label2)
+/**************************************************************************
+ *
+ * D E C L A R A T I O N   O F   T H E   P R I V A T E   F U N C T I O N S
+ *
+ **************************************************************************/
+
+
+/**
+ * @brief Internal function comparing two rank names.
+ *
+ * @param label1 A char* pointer on the first rank name.
+ * @param label2 A char** pointer on a second pointer, that second char* pointer being on the second rank name.
+ * 				 (making the function usable with an ecorankidx_t structure and functions like bsearch)
+ *
+ * @returns A value < 0 if label1 < label2,
+ * 			a value > 0 if label1 > label2,
+ * 			and 0 if label1 == label2.
+ */
+static int cmp_rank_labels(const void* label1, const void* label2);
+
+
+/**
+ * @brief Internal function comparing two taxids, one of them stored in an ecotx_t structure.
+ *
+ * @param ptaxid The first taxid.
+ * @param ptaxon A pointer on an ecotx_t structure where the second taxid is stored.
+ *
+ * @returns A value < 0 if taxid1 < taxid2,
+ * 			a value > 0 if taxid1 > taxid2,
+ * 			and 0 if taxid1 == taxid2.
+ */
+static int cmp_taxids_in_ecotx_t(const void* ptaxid, const void* ptaxon);
+
+
+/**
+ * @brief Internal function comparing two taxids, one of them stored in an ecomerged_t structure.
+ *
+ * @param ptaxid The first taxid.
+ * @param ptaxon A pointer on an ecomerged_t structure where the second taxid is stored.
+ *
+ * @returns A value < 0 if taxid1 < taxid2,
+ * 			a value > 0 if taxid1 > taxid2,
+ * 			and 0 if taxid1 == taxid2.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int cmp_taxids_in_ecomerged_t(const void* ptaxid, const void* ptaxon);
+
+
+/**
+ * @brief Internal function comparing two character strings pointed to by char** pointers.
+ *
+ * @param s1 A char** pointer on a second pointer, that second char* pointer being on the first character string.
+ * @param s2 A char** pointer on a second pointer, that second char* pointer being on the second character string.
+ *
+ * @returns A value < 0 if s1 < s2,
+ * 			a value > 0 if s1 > s2,
+ * 			and 0 if s1 == s2.
+ */
+static int cmp_str(const void* s1, const void* s2);
+
+
+/**
+ * @brief Internal function comparing two taxon names stored in econame_t structures.
+ *
+ * @param n1 A pointer on the first econame_t structure.
+ * @param n2 A pointer on the second econame_t structure.
+ *
+ * @returns A value < 0 if n1 < n2,
+ * 			a value > 0 if n1 > n2,
+ * 			and 0 if n1 == n2.
+ *
+ * @since 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int cmp_names(const void* n1, const void* n2);
+
+
+/**
+ * @brief Internal function comparing returning the ecotx_t structure associated with a taxid.
+ *
+ * This function only looks for the taxid in the modern taxonomy, it does not consider deprecated
+ * and old taxids, unlike obi_taxo_get_taxon_with_taxid().
+ *
+ * @param taxonomy A pointer on the taxonomy structure.
+ * @param taxid The taxid of the taxon wanted.
+ *
+ * @returns A pointer on the ecotx_t structure associated with a taxid.
+ *
+ * @see obi_taxo_get_taxon_with_taxid()
+ */
+static ecotx_t* get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
+
+
+/**
+ * @brief Internal function returning the complete path to a taxonomy directory in a DMS.
+ *
+ * @param dms A pointer on the DMS.
+ * @param tax_name The name of the taxonomy.
+ *
+ * @returns The complete path to the taxonomy directory.
+ * @retval NULL if an error occurred.
+ *
+ * @since 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name);
+
+
+/**
+ * @brief Internal function returning the index of a rank in an ecorankidx_t structure.
+ *
+ * @param label The name of the rank.
+ * @param ranks A pointer on an ecorankidx_t structure.
+ *
+ * @returns The index of a rank in the ecorankidx_t structure.
+ * @retval -1 if the rank was not found.
+ */
+static int32_t rank_index(const char* label, ecorankidx_t* ranks);
+
+
+/**
+ * @brief Internal function opening a binary taxonomy file (.tdx, .rdx, .ndx, .adx, .pdx, .ldx).
+ *
+ * @param file_name The file path.
+ * @param count A pointer on an integer that the function will set to the number of records in the file.
+ * @param abort_on_open_error A boolean indicating whether the function should trigger an error if the file can't be open.
+ *
+ * @returns The FILE object.
+ * @retval NULL if an error occurred or if the file was not found.
+ */
+static FILE* open_ecorecorddb(const char* file_name, int32_t* count, int32_t abort_on_open_error);
+
+
+/**
+ * @brief Internal function returning the next record in a binary taxonomy file (.tdx, .rdx, .ndx, .adx, .pdx, .ldx).
+ *
+ * @param f The file object with the offset at the start of a record.
+ * @param record_size A pointer on an integer that the function will set to the size of the record.
+ *
+ * @returns A pointer on the read record.
+ * @retval NULL if an error occurred.
+ */
+static void* read_ecorecord(FILE* f, int32_t* record_size);
+
+
+/**
+ * @brief Internal function reading the next taxon record in a .tdx binary taxonomy file.
+ *
+ * @param f The file object with the offset at the start of a record.
+ * @param taxon A pointer on an empty, allocated ecotx_t structure that the function will fill.
+ *
+ * @returns A pointer on the read record.
+ * @retval NULL if an error occurred.
+ */
+static ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon);
+
+
+/**
+ * @brief Internal function reading the next taxon name record in a .ndx binary taxonomy file.
+ *
+ * @param f The file object with the offset at the start of a record.
+ * @param name A pointer on an empty, allocated econame_t structure that the function will fill.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on the read record.
+ * @retval NULL if an error occurred.
+ */
+static econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Internal function reading the next taxon preferred name record in a .pdx binary taxonomy file.
+ *
+ * @param f The file object with the offset at the start of a record.
+ * @param name A pointer on an empty, allocated econame_t structure that the function will fill.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on the read record.
+ * @retval NULL if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Internal function reading a taxonomic ranks (.rdx) binary taxonomy file.
+ *
+ * @param ranks_file_name The name of the .rdx file to read.
+ *
+ * @returns A pointer on an ecorankidx_t structure.
+ * @retval NULL if an error occurred.
+ */
+static ecorankidx_t* read_ranks_idx(const char* ranks_file_name);
+
+
+/**
+ * @brief Internal function reading the taxa (.tdx, .ldx) binary taxonomy file.
+ *
+ * @param taxa_file_name The name of the .tdx file to read.
+ * @param local_taxa_file_name The name of the .ldx file containing the local taxa to read if there is one.
+ *
+ * @returns A pointer on an ecotxidx_t structure.
+ * @retval NULL if an error occurred.
+ */
+static ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name);
+
+
+/**
+ * @brief Internal function reading a names (.ndx) binary taxonomy file.
+ *
+ * @param file_name The name of the .ndx file to read.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on an econameidx_t structure.
+ * @retval NULL if an error occurred.
+ */
+static econameidx_t* read_names_idx(const char* file_name, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Internal function reading a preferred names (.pdx) binary taxonomy file.
+ *
+ * @param file_name The name of the .pdx file to read.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on an econameidx_t structure.
+ * @retval NULL if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static econameidx_t* read_preferred_names_idx(const char* file_name, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Internal function reading a merged index (.adx) binary taxonomy file.
+ *
+ * @param file_name The name of the .adx file to read.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on an ecomergedidx_t structure.
+ * @retval NULL if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static ecomergedidx_t* read_merged_idx(const char* file_name, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Internal function writing a rank index (.rdx) binary taxonomy file.
+ *
+ * @param dms A pointer on the DMS.
+ * @param tax A pointer on the taxonomy structure.
+ * @param taxonomy_name The name of the taxonomy.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ */
+static int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
+
+
+/**
+ * @brief Internal function writing a taxonomy index (.tdx) binary taxonomy file.
+ *
+ * @param dms A pointer on the DMS.
+ * @param tax A pointer on the taxonomy structure.
+ * @param taxonomy_name The name of the taxonomy.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ */
+static int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
+
+
+/**
+ * @brief Internal function writing a local taxonomy index (.ldx) binary taxonomy file.
+ *
+ * @param dms A pointer on the DMS.
+ * @param tax A pointer on the taxonomy structure.
+ * @param taxonomy_name The name of the taxonomy.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
+
+
+/**
+ * @brief Internal function writing a names index (.ndx) binary taxonomy file.
+ *
+ * @param dms A pointer on the DMS.
+ * @param tax A pointer on the taxonomy structure.
+ * @param taxonomy_name The name of the taxonomy.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ */
+static int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
+
+
+/**
+ * @brief Internal function writing a preferred names index (.pdx) binary taxonomy file.
+ *
+ * @param dms A pointer on the DMS.
+ * @param tax A pointer on the taxonomy structure.
+ * @param taxonomy_name The name of the taxonomy.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
+
+
+/**
+ * @brief Internal function writing a merged index (.adx) binary taxonomy file.
+ *
+ * @param dms A pointer on the DMS.
+ * @param tax A pointer on the taxonomy structure.
+ * @param taxonomy_name The name of the taxonomy.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
+
+
+/**
+ * @brief Internal function reading the 'nodes.dmp' file from an NCBI taxdump.
+ *
+ * @param taxdump The path to the taxdump.
+ * @param tax A pointer on the taxonomy structure.
+ * @param rank_names_p A char*** pointer on a non allocated char* array where the function will store rank names.
+ * @param parent_taxids_p An int** pointer on a non allocated int array where the function will store parent taxids.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int read_nodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, char*** rank_names_p, int** parent_taxids_p);
+
+
+/**
+ * @brief Internal function reading the 'delnodes.dmp' file from an NCBI taxdump.
+ *
+ * @param taxdump The path to the taxdump.
+ * @param tax A pointer on the taxonomy structure.
+ * @param delnodes_p An int** pointer on a non allocated int array where the function will store deleted taxids.
+ * @param delnodes_count An int* pointer where the function will store the number of deleted taxids.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int read_delnodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t** delnodes_p, int32_t* delnodes_count);
+
+
+/**
+ * @brief Internal function reading the 'merged.dmp' file from an NCBI taxdump.
+ *
+ * @warning Should be used AFTER read_nodes_dmp() and read_delnodes_dmp().
+ *
+ * The function merges the information about current nodes previously read in read_nodes_dmp(),
+ * the information about deleted nodes previously read in read_delnodes_dmp(), and the information read
+ * in the 'merged.dmp' file, to build the final merged taxon index in the taxonomy structure.
+ *
+ * @param taxdump The path to the taxdump.
+ * @param tax A pointer on the taxonomy structure.
+ * @param delnodes An int* pointer containing the deleted taxids.
+ * @param delnodes_count The number of deleted taxids.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnodes, int32_t delnodes_count);
+
+
+/**
+ * @brief Internal function reading the 'names.dmp' file from an NCBI taxdump.
+ *
+ * @param taxdump The path to the taxdump.
+ * @param tax A pointer on the taxonomy structure.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int read_names_dmp(const char* taxdump, OBIDMS_taxonomy_p tax);
+
+
+/************************************************************************
+ *
+ * D E F I N I T I O N   O F   T H E   P R I V A T E   F U N C T I O N S
+ *
+ ************************************************************************/
+
+
+static int cmp_rank_labels(const void* label1, const void* label2)
 {
 	return strcmp((const char*)label1,*(const char**)label2);
 }
@@ -66,7 +495,23 @@ static int cmp_names(const void* n1, const void* n2)
 }
 
 
-char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name)
+static ecotx_t* get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
+{
+	ecotx_t *current_taxon;
+	int32_t  count;
+
+	count = (taxonomy->taxa)->count;
+
+	current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid),
+                                       (const void *) taxonomy->taxa->taxon,
+                                       count,
+                                       sizeof(ecotx_t),
+									   cmp_taxids_in_ecotx_t);
+	return current_taxon;
+}
+
+
+static char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name)
 {
 	char*   all_tax_dir_path;
 	char*   tax_path;
@@ -98,7 +543,7 @@ char* get_taxonomy_path(OBIDMS_p dms, const char* tax_name)
 }
 
 
-int32_t rank_index(const char* label, ecorankidx_t* ranks)
+static int32_t rank_index(const char* label, ecorankidx_t* ranks)
 {
 	char **rep;
 
@@ -111,7 +556,50 @@ int32_t rank_index(const char* label, ecorankidx_t* ranks)
 }
 
 
-void* read_ecorecord(FILE* f, int32_t* record_size)
+static FILE* open_ecorecorddb(const char* file_name,
+                       int32_t*    count,
+                       int32_t     abort_on_open_error)
+{
+    FILE*		f;
+	int32_t     read;
+
+	f = fopen(file_name, "rb");
+
+	if (!f)
+	{
+		if (abort_on_open_error)
+		{
+	 		obi_set_errno(OBI_TAXONOMY_ERROR);
+	 		obidebug(1, "\nCouldn't open a taxonomy file");
+	 		fclose(f);
+	 		return NULL;
+		}
+	 	else
+	 	{
+	 		*count = 0;
+	 		fclose(f);
+	 		return NULL;
+	 	}
+	}
+
+	read = fread(count,
+	      		 sizeof(int32_t),
+				 1,
+	      		 f);
+
+	if (read != 1)
+	{
+ 		obi_set_errno(OBI_TAXONOMY_ERROR);
+ 		obidebug(1, "\nError reading taxonomy record size");
+ 		fclose(f);
+ 		return NULL;
+	}
+
+	return f;
+}
+
+
+static void* read_ecorecord(FILE* f, int32_t* record_size)
 {
 	static void* buffer = NULL;
 	int32_t      buffer_size = 0;
@@ -174,7 +662,7 @@ void* read_ecorecord(FILE* f, int32_t* record_size)
 };
 
 
-ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon)
+static ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon)
 {
 	ecotxformat_t* raw;
 	int32_t        record_length;
@@ -203,50 +691,100 @@ ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon)
 }
 
 
-FILE* open_ecorecorddb(const char* file_name,
-                       int32_t*    count,
-                       int32_t     abort_on_open_error)
+static econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
 {
-    FILE*		f;
-	int32_t     read;
+	econameformat_t* raw;
+	int32_t          record_length;
 
-	f = fopen(file_name, "rb");
+	raw = read_ecorecord(f, &record_length);
+	if (raw == NULL)
+ 		return NULL;
 
-	if (!f)
+	name->is_scientific_name = raw->is_scientific_name;
+
+	name->name = malloc((raw->name_length + 1) * sizeof(char));
+	if (name->name == NULL)
 	{
-		if (abort_on_open_error)
-		{
-	 		obi_set_errno(OBI_TAXONOMY_ERROR);
-	 		obidebug(1, "\nCouldn't open a taxonomy file");
-	 		fclose(f);
-	 		return NULL;
-		}
-	 	else
-	 	{
-	 		*count = 0;
-	 		fclose(f);
-	 		return NULL;
-	 	}
-	}
-
-	read = fread(count,
-	      		 sizeof(int32_t),
-				 1,
-	      		 f);
-
-	if (read != 1)
-	{
- 		obi_set_errno(OBI_TAXONOMY_ERROR);
- 		obidebug(1, "\nError reading taxonomy record size");
- 		fclose(f);
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError allocating memory for a taxon name");
+ 		free(raw);
  		return NULL;
 	}
+	strncpy(name->name, raw->names, raw->name_length);
+	name->name[raw->name_length] = 0;
 
-	return f;
+	name->class_name = malloc((raw->class_length+1) * sizeof(char));
+	if (name->class_name == NULL)
+	{
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError allocating memory for a taxon class name");
+ 		free(name->name);
+ 		free(raw);
+ 		return NULL;
+	}
+	strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
+	name->class_name[raw->class_length] = 0;
+
+	name->taxon = taxonomy->taxa->taxon + raw->taxid;
+
+	return name;
 }
 
 
-ecorankidx_t* read_ranks_idx(const char* ranks_file_name)
+static econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
+{
+	econameformat_t* raw;
+	int32_t          record_length;
+
+	raw = read_ecorecord(f, &record_length);
+	if (raw == NULL)
+ 		return NULL;
+
+	name->is_scientific_name = raw->is_scientific_name;
+
+	name->name = malloc((raw->name_length + 1) * sizeof(char));
+	if (name->name == NULL)
+	{
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError allocating memory for a taxon preferred name");
+ 		free(raw);
+ 		return NULL;
+	}
+	strncpy(name->name, raw->names, raw->name_length);
+	name->name[raw->name_length] = 0;
+
+	name->class_name = malloc((raw->class_length+1) * sizeof(char));
+	if (name->class_name == NULL)
+	{
+ 		obi_set_errno(OBI_MALLOC_ERROR);
+ 		obidebug(1, "\nError allocating memory for a taxon class name");
+ 		free(name->name);
+ 		free(raw);
+ 		return NULL;
+	}
+	strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
+	name->class_name[raw->class_length] = 0;
+
+	name->taxon = taxonomy->taxa->taxon + raw->taxid;
+
+	// Add the preferred name in the taxon structure 	// TODO discuss: couldn't they all use the same pointer?
+	(taxonomy->taxa->taxon + raw->taxid)->preferred_name = malloc((raw->name_length + 1) * sizeof(char));
+	if ((taxonomy->taxa->taxon + raw->taxid)->preferred_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a taxon preferred name");
+		free(name->name);
+		free(name->class_name);
+		free(raw);
+		return NULL;
+	}
+	strcpy((taxonomy->taxa->taxon + raw->taxid)->preferred_name, name->name);
+
+	return name;
+}
+
+
+static ecorankidx_t* read_ranks_idx(const char* ranks_file_name)
 {
 	int32_t      			count;
 	FILE*        			ranks_file;
@@ -301,7 +839,7 @@ ecorankidx_t* read_ranks_idx(const char* ranks_file_name)
 }
 
 
-ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name)
+static ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa_file_name)
 {
 	int32_t      	  count_taxa;
 	int32_t      	  count_local_taxa;
@@ -394,100 +932,7 @@ ecotxidx_t* read_taxonomy_idx(const char* taxa_file_name, const char* local_taxa
 }
 
 
-econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
-{
-	econameformat_t* raw;
-	int32_t          record_length;
-
-	raw = read_ecorecord(f, &record_length);
-	if (raw == NULL)
- 		return NULL;
-
-	name->is_scientific_name = raw->is_scientific_name;
-
-	name->name = malloc((raw->name_length + 1) * sizeof(char));
-	if (name->name == NULL)
-	{
- 		obi_set_errno(OBI_MALLOC_ERROR);
- 		obidebug(1, "\nError allocating memory for a taxon name");
- 		free(raw);
- 		return NULL;
-	}
-	strncpy(name->name, raw->names, raw->name_length);
-	name->name[raw->name_length] = 0;
-
-	name->class_name = malloc((raw->class_length+1) * sizeof(char));
-	if (name->class_name == NULL)
-	{
- 		obi_set_errno(OBI_MALLOC_ERROR);
- 		obidebug(1, "\nError allocating memory for a taxon class name");
- 		free(name->name);
- 		free(raw);
- 		return NULL;
-	}
-	strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
-	name->class_name[raw->class_length] = 0;
-
-	name->taxon = taxonomy->taxa->taxon + raw->taxid;
-
-	return name;
-}
-
-
-econame_t* readnext_ecopreferredname(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
-{
-	econameformat_t* raw;
-	int32_t          record_length;
-
-	raw = read_ecorecord(f, &record_length);
-	if (raw == NULL)
- 		return NULL;
-
-	name->is_scientific_name = raw->is_scientific_name;
-
-	name->name = malloc((raw->name_length + 1) * sizeof(char));
-	if (name->name == NULL)
-	{
- 		obi_set_errno(OBI_MALLOC_ERROR);
- 		obidebug(1, "\nError allocating memory for a taxon preferred name");
- 		free(raw);
- 		return NULL;
-	}
-	strncpy(name->name, raw->names, raw->name_length);
-	name->name[raw->name_length] = 0;
-
-	name->class_name = malloc((raw->class_length+1) * sizeof(char));
-	if (name->class_name == NULL)
-	{
- 		obi_set_errno(OBI_MALLOC_ERROR);
- 		obidebug(1, "\nError allocating memory for a taxon class name");
- 		free(name->name);
- 		free(raw);
- 		return NULL;
-	}
-	strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
-	name->class_name[raw->class_length] = 0;
-
-	name->taxon = taxonomy->taxa->taxon + raw->taxid;
-
-	// Add the preferred name in the taxon structure 	// TODO discuss: couldn't they all use the same pointer?
-	(taxonomy->taxa->taxon + raw->taxid)->preferred_name = malloc((raw->name_length + 1) * sizeof(char));
-	if ((taxonomy->taxa->taxon + raw->taxid)->preferred_name == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError allocating memory for a taxon preferred name");
-		free(name->name);
-		free(name->class_name);
-		free(raw);
-		return NULL;
-	}
-	strcpy((taxonomy->taxa->taxon + raw->taxid)->preferred_name, name->name);
-
-	return name;
-}
-
-
-econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
+static econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 {
 	int32_t      		count;
 	FILE*				f;
@@ -526,7 +971,7 @@ econameidx_t* read_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 }
 
 
-econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
+static econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 {
 	int32_t      		count;
 	FILE*				f;
@@ -565,7 +1010,7 @@ econameidx_t* read_preferred_names_idx(const char *file_name, OBIDMS_taxonomy_p
 }
 
 
-ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
+static ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
 {
 	int32_t      		count;
 	FILE*				f;
@@ -610,15 +1055,7 @@ ecomergedidx_t* read_merged_idx(const char *file_name, OBIDMS_taxonomy_p taxonom
 }
 
 
-
-
-
-
-
-
-// Functions to write taxonomy structure to binary files
-
-int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
+static int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
 {
 	int     i;
 	char* 	file_name;
@@ -721,7 +1158,7 @@ int write_ranks_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_na
 }
 
 
-int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
+static int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
 {
 	int     i;
 	char* 	file_name;
@@ -858,7 +1295,7 @@ int write_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy
 }
 
 
-int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
+static int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
 {
 	int     i;
 	char* 	file_name;
@@ -995,7 +1432,7 @@ int write_local_taxonomy_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* ta
 }
 
 
-int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
+static int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
 {
 	int     i;
 	char* 	file_name;
@@ -1143,7 +1580,7 @@ int write_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_na
 }
 
 
-int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
+static int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct? keep argument but if NULL, use the one in struct?
 {
 	int     i;
 	char* 	file_name;
@@ -1185,7 +1622,7 @@ int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* t
 	free(taxonomy_path);
 
 	// Create file
-	file_descriptor = open(file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
+	file_descriptor = open(file_name, O_RDWR | O_CREAT, 0777);
 	if (file_descriptor < 0)
 	{
 		obi_set_errno(OBI_TAXONOMY_ERROR);
@@ -1291,7 +1728,7 @@ int write_preferred_names_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* t
 }
 
 
-int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
+static int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name)		// TODO prefix in taxonomy struct?
 {
 	int     i;
 	char* 	file_name;
@@ -1399,48 +1836,6 @@ int write_merged_idx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_n
 }
 
 
-int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name)
-{
-	char* taxonomy_path;
-
-	// Build the taxonomy directory path
-	taxonomy_path = get_taxonomy_path(dms, tax_name);
-	if (taxonomy_path == NULL)
-		return -1;
-
-	// Try to create the directory
-	if (mkdir(taxonomy_path, 00777) < 0)
-	{
-		if (errno == EEXIST)
-			obidebug(1, "\nA taxonomy already exists with this name.");
-		obidebug(1, "\nProblem creating a new taxonomy directory");
-		free(taxonomy_path);
-		return -1;
-	}
-
-	free(taxonomy_path);
-
-    if (write_ranks_idx(dms, tax, tax_name) < 0)
-        return -1;
-    if (write_taxonomy_idx(dms, tax, tax_name) < 0)
-        return -1;
-    if (write_names_idx(dms, tax, tax_name) < 0)
-    	return -1;
-    if (write_merged_idx(dms, tax, tax_name) < 0)
-    	return -1;
-    // Check if there are local taxa (if so last taxon is local)
-    if ((tax->taxa)->local_count > 0)
-    	if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
-    		return -1;
-    // Write preferred names if there are some
-    if (tax->preferred_names != NULL)
-    	if (write_preferred_names_idx(dms, tax, tax_name) < 0)
-    		return -1;
-	return 0;
-}
-
-
-
 int read_nodes_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, 	char***	rank_names_p, int** parent_taxids_p)
 {
 	struct dirent* 	dp;
@@ -2049,7 +2444,7 @@ int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnode
 
 					// Store the deprecated taxid with the index that refers to the new taxid
 						// Find the index of the new taxid
-				t = obi_taxo_get_taxon_with_current_taxid(tax, taxid);
+				t = get_taxon_with_current_taxid(tax, taxid);
 						// Store the old taxid with the index
 				(tax->merged_idx)->merged[n].taxid = old_taxid;
 				(tax->merged_idx)->merged[n].idx = t->idx;
@@ -2335,6 +2730,13 @@ int read_names_dmp(const char* taxdump, OBIDMS_taxonomy_p tax)
 }
 
 
+/**********************************************************************
+ *
+ * D E F I N I T I O N   O F   T H E   P U B L I C   F U N C T I O N S
+ *
+ **********************************************************************/
+
+
 OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 {
 	OBIDMS_taxonomy_p tax;
@@ -2524,7 +2926,7 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 	// Associate the taxa with their parent
 	for (i=0; i < (tax->taxa)->count; i++)
 	{
-		((tax->taxa)->taxon)[i].parent = obi_taxo_get_taxon_with_current_taxid(tax, parent_taxids[i]);
+		((tax->taxa)->taxon)[i].parent = get_taxon_with_current_taxid(tax, parent_taxids[i]);
 		if (((tax->taxa)->taxon)[i].parent == NULL)
 		{
 			obi_set_errno(OBI_TAXONOMY_ERROR);
@@ -2574,192 +2976,6 @@ OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump)
 }
 
 
-int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
-{
-	int32_t    taxid;
-	ecotx_t*   taxon;
-	int        i;
-//	econame_t* name_struct;
-
-	// Enlarge the structure memory for a new taxon
-	tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (((tax->taxa)->count) + 1));
-	if (tax->taxa == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
-		return -1;
-	}
-
-	// Compute new taxid that must be equal or greater than 1E7 and greater than the maximum taxid existing in the taxonomy
-	if (min_taxid < MIN_LOCAL_TAXID)
-		min_taxid = MIN_LOCAL_TAXID;
-	if (min_taxid > (tax->taxa)->max_taxid)
-		taxid = min_taxid;
-	else
-		taxid = ((tax->taxa)->max_taxid) + 1;
-
-	// Fill the ecotx_t node structure
-	taxon = ((tax->taxa)->taxon)+((tax->taxa)->count);
-	taxon->taxid = taxid;
-	taxon->idx = (tax->taxa)->count;
-	taxon->local = true;
-	taxon->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
-	if (taxon->name == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
-		return -1;
-	}
-	strcpy(taxon->name, name);
-	taxon->rank = -1;
-	for (i=0; i < (tax->ranks)->count; i++)
-	{
-		if (strcmp(rank_name, ((tax->ranks)->label)[i]) == 0)
-		{
-			taxon->rank = i;
-			break;
-		}
-	}
-	if (taxon->rank == -1)
-	{
-		obi_set_errno(OBI_TAXONOMY_ERROR);
-		obidebug(1, "\nError: taxon rank not found when adding a new taxon");
-		return -1;
-	}
-	taxon->parent = obi_taxo_get_taxon_with_taxid(tax, parent_taxid);
-	if (taxon->parent == NULL)
-	{
-		obi_set_errno(OBI_TAXONOMY_ERROR);
-		obidebug(1, "\nError: taxon parent not found when adding a new taxon");
-		return -1;
-	}
-	taxon->farest = 0;
-
-	// Update taxonomy counts etc
-	(tax->taxa)->max_taxid = taxid;
-	((tax->taxa)->count)++;
-	((tax->taxa)->local_count)++;
-	(tax->taxa)->buffer_size = (tax->taxa)->count;
-
-//	// Add new name in names structure     // Commented because the new name was not added in the .ndx file in the OBITools1
-//	// Allocate memory for new name
-//	tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1));
-//	if (tax->names == NULL)
-//	{
-//		obi_set_errno(OBI_MALLOC_ERROR);
-//		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
-//		return -1;
-//	}
-//
-//	// Add new name
-//	name_struct = (tax->names)->names + ((tax->names)->count);
-//	name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
-//	if (name_struct->name == NULL)
-//	{
-//		obi_set_errno(OBI_MALLOC_ERROR);
-//		obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
-//		return -1;
-//	}
-//	strcpy(name_struct->name, name);
-//	name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char));
-//	if (name_struct->class_name == NULL)
-//	{
-//		obi_set_errno(OBI_MALLOC_ERROR);
-//		obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon");
-//		return -1;
-//	}
-//	strcpy(name_struct->class_name, "scientific name");
-//	name_struct->is_scientific_name = true;
-//	name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1;
-//
-//	// Sort names in alphabetical order
-//	qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names);
-//
-//	// Update name count
-//	((tax->names)->count)++;
-
-	return taxid;
-}
-
-
-int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
-{
-	ecotx_t* taxon;
-
-	taxon = obi_taxo_get_taxon_with_taxid(tax, taxid);
-
-	return obi_taxo_add_preferred_name_with_taxon(tax, taxon, preferred_name);
-}
-
-
-int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
-{
-	econame_t* name_struct;
-
-	// Free previous preferred name if there is one
-	if (taxon->preferred_name != NULL)
-		free(taxon->preferred_name);
-
-	taxon->preferred_name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
-	if (taxon->preferred_name == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError allocating memory for a new preferred name for a taxon");
-		return -1;
-	}
-	strcpy(taxon->preferred_name, preferred_name);
-
-	// Add new name in preferred names structure
-	// Allocate or reallocate memory for new name
-	if (tax->preferred_names == NULL)
-	{
-		tax->preferred_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t));
-		(tax->preferred_names)->count = 0;
-	}
-	else
-		tax->preferred_names = (econameidx_t*) realloc(tax->preferred_names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->preferred_names)->count + 1));
-	if (tax->preferred_names == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new preferred name");
-		return -1;
-	}
-
-	// Add new preferred name
-	name_struct = (tax->preferred_names)->names + ((tax->preferred_names)->count);
-	name_struct->name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
-	if (name_struct->name == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError allocating memory for a new taxon preferred name");
-		return -1;
-	}
-	strcpy(name_struct->name, preferred_name);
-
-	name_struct->class_name = (char*) malloc((strlen("preferred name") + 1) * sizeof(char));
-	if (name_struct->class_name == NULL)
-	{
-		obi_set_errno(OBI_MALLOC_ERROR);
-		obidebug(1, "\nError allocating memory for a taxon class name to add a new preferred name");
-		return -1;
-	}
-	strcpy(name_struct->class_name, "preferred name");
-	name_struct->is_scientific_name = false;
-	name_struct->taxon = taxon;
-
-	// Sort preferred names in alphabetical order
-	qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names);
-
-	// Update preferred name count
-	((tax->preferred_names)->count)++;
-
-	return 0;
-}
-
-
-/////// PUBLIC /////////
-
-
 OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names)
 {
 	OBIDMS_taxonomy_p  tax;
@@ -2977,28 +3193,82 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 }
 
 
+int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name)
+{
+	char* taxonomy_path;
+
+	// Build the taxonomy directory path
+	taxonomy_path = get_taxonomy_path(dms, tax_name);
+	if (taxonomy_path == NULL)
+		return -1;
+
+	// Try to create the directory
+	if (mkdir(taxonomy_path, 00777) < 0)
+	{
+		if (errno == EEXIST)
+			obidebug(1, "\nA taxonomy already exists with this name.");
+		obidebug(1, "\nProblem creating a new taxonomy directory");
+		free(taxonomy_path);
+		return -1;
+	}
+
+	free(taxonomy_path);
+
+    if (write_ranks_idx(dms, tax, tax_name) < 0)
+        return -1;
+    if (write_taxonomy_idx(dms, tax, tax_name) < 0)
+        return -1;
+    if (write_names_idx(dms, tax, tax_name) < 0)
+    	return -1;
+    if (write_merged_idx(dms, tax, tax_name) < 0)
+    	return -1;
+    // Check if there are local taxa (if so last taxon is local)
+    if ((tax->taxa)->local_count > 0)
+    	if (write_local_taxonomy_idx(dms, tax, tax_name) < 0)
+    		return -1;
+    // Write preferred names if there are some
+    if (tax->preferred_names != NULL)
+    	if (write_preferred_names_idx(dms, tax, tax_name) < 0)
+    		return -1;
+	return 0;
+}
+
+
 int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
 {
 	int i;
 
-	// Update local informations (local taxa and preferred names) if there are any
-	if ((taxonomy->taxa)->local_count > 0)
-	{
-		if (taxonomy->dms == NULL)
-		{
-	 		obi_set_errno(OBI_TAXONOMY_ERROR);
-	 		obidebug(1, "\nError closing a taxonomy with local files but no DMS associated (probably read directly from taxdump)");		// TODO discuss
-		}
-		if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
-			return -1;
-	}
-    // Write preferred names if there are some
-    if (taxonomy->preferred_names != NULL)
-    	if (write_preferred_names_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
-    		return -1;
-
 	if (taxonomy)
 	{
+		// Update local informations (local taxa and preferred names) if there are any
+		if ((taxonomy->taxa)->local_count > 0)
+		{
+			if (taxonomy->dms == NULL)
+			{
+				obi_set_errno(OBI_TAXONOMY_ERROR);
+				obidebug(1, "\nError closing a taxonomy with local files but no DMS associated (probably read directly from taxdump)");		// TODO discuss
+			}
+			if (write_local_taxonomy_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
+				return -1;
+		}
+
+		// Write preferred names if there are some
+		if (taxonomy->preferred_names)
+		{
+			if (write_preferred_names_idx(taxonomy->dms, taxonomy, taxonomy->tax_name) < 0)
+				return -1;
+
+			// Free preferred names
+			for (i=0; i < (taxonomy->preferred_names)->count; i++)
+			{
+				if (((taxonomy->preferred_names)->names[i]).name)
+					free(((taxonomy->preferred_names)->names[i]).name);
+				if (((taxonomy->preferred_names)->names[i]).class_name)
+					free(((taxonomy->preferred_names)->names[i]).class_name);
+			}
+			free(taxonomy->preferred_names);
+		}
+
 		if (taxonomy->ranks)
 		{
 			for (i=0; i < (taxonomy->ranks)->count; i++)
@@ -3043,7 +3313,187 @@ int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
 }
 
 
-//////////////////////////////////////////////////////////////////////////
+int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
+{
+	int32_t    taxid;
+	ecotx_t*   taxon;
+	int        i;
+//	econame_t* name_struct;
+
+	// Enlarge the structure memory for a new taxon
+	tax->taxa = (ecotxidx_t*) realloc(tax->taxa, sizeof(ecotxidx_t) + sizeof(ecotx_t) * (((tax->taxa)->count) + 1));
+	if (tax->taxa == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
+		return -1;
+	}
+
+	// Compute new taxid that must be equal or greater than 1E7 and greater than the maximum taxid existing in the taxonomy
+	if (min_taxid < MIN_LOCAL_TAXID)
+		min_taxid = MIN_LOCAL_TAXID;
+	if (min_taxid > (tax->taxa)->max_taxid)
+		taxid = min_taxid;
+	else
+		taxid = ((tax->taxa)->max_taxid) + 1;
+
+	// Fill the ecotx_t node structure
+	taxon = ((tax->taxa)->taxon)+((tax->taxa)->count);
+	taxon->taxid = taxid;
+	taxon->idx = (tax->taxa)->count;
+	taxon->local = true;
+	taxon->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
+	if (taxon->name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
+		return -1;
+	}
+	strcpy(taxon->name, name);
+	taxon->rank = -1;
+	for (i=0; i < (tax->ranks)->count; i++)
+	{
+		if (strcmp(rank_name, ((tax->ranks)->label)[i]) == 0)
+		{
+			taxon->rank = i;
+			break;
+		}
+	}
+	if (taxon->rank == -1)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError: taxon rank not found when adding a new taxon");
+		return -1;
+	}
+	taxon->parent = obi_taxo_get_taxon_with_taxid(tax, parent_taxid);
+	if (taxon->parent == NULL)
+	{
+		obi_set_errno(OBI_TAXONOMY_ERROR);
+		obidebug(1, "\nError: taxon parent not found when adding a new taxon");
+		return -1;
+	}
+	taxon->farest = 0;
+
+	// Update taxonomy counts etc
+	(tax->taxa)->max_taxid = taxid;
+	((tax->taxa)->count)++;
+	((tax->taxa)->local_count)++;
+	(tax->taxa)->buffer_size = (tax->taxa)->count;
+
+//	// Add new name in names structure     // Commented because the new name was not added in the .ndx file in the OBITools1
+//	// Allocate memory for new name
+//	tax->names = (econameidx_t*) realloc(tax->names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->names)->count + 1));
+//	if (tax->names == NULL)
+//	{
+//		obi_set_errno(OBI_MALLOC_ERROR);
+//		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new taxon");
+//		return -1;
+//	}
+//
+//	// Add new name
+//	name_struct = (tax->names)->names + ((tax->names)->count);
+//	name_struct->name = (char*) malloc((strlen(name) + 1) * sizeof(char));
+//	if (name_struct->name == NULL)
+//	{
+//		obi_set_errno(OBI_MALLOC_ERROR);
+//		obidebug(1, "\nError allocating memory for a taxon name to add a new taxon");
+//		return -1;
+//	}
+//	strcpy(name_struct->name, name);
+//	name_struct->class_name = (char*) malloc((strlen("scientific name") + 1) * sizeof(char));
+//	if (name_struct->class_name == NULL)
+//	{
+//		obi_set_errno(OBI_MALLOC_ERROR);
+//		obidebug(1, "\nError allocating memory for a taxon class name to add a new taxon");
+//		return -1;
+//	}
+//	strcpy(name_struct->class_name, "scientific name");
+//	name_struct->is_scientific_name = true;
+//	name_struct->taxon = ((tax->taxa)->taxon) + ((tax->taxa)->count) - 1;
+//
+//	// Sort names in alphabetical order
+//	qsort((tax->names)->names, (tax->names)->count, sizeof(econame_t), cmp_names);
+//
+//	// Update name count
+//	((tax->names)->count)++;
+
+	return taxid;
+}
+
+
+int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
+{
+	ecotx_t* taxon;
+
+	taxon = obi_taxo_get_taxon_with_taxid(tax, taxid);
+
+	return obi_taxo_add_preferred_name_with_taxon(tax, taxon, preferred_name);
+}
+
+
+int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
+{
+	econame_t* name_struct;
+
+	// Free previous preferred name if there is one
+	if (taxon->preferred_name != NULL)
+		free(taxon->preferred_name);
+
+	taxon->preferred_name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
+	if (taxon->preferred_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a new preferred name for a taxon");
+		return -1;
+	}
+	strcpy(taxon->preferred_name, preferred_name);
+
+	// Add new name in preferred names structure
+	// Allocate or reallocate memory for new name
+	if (tax->preferred_names == NULL)
+	{
+		tax->preferred_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t));
+		(tax->preferred_names)->count = 0;
+	}
+	else
+		tax->preferred_names = (econameidx_t*) realloc(tax->preferred_names, sizeof(econameidx_t) + sizeof(econame_t) * ((tax->preferred_names)->count + 1));
+	if (tax->preferred_names == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError reallocating memory for a taxonomy structure to add a new preferred name");
+		return -1;
+	}
+
+	// Add new preferred name
+	name_struct = (tax->preferred_names)->names + ((tax->preferred_names)->count);
+	name_struct->name = (char*) malloc((strlen(preferred_name) + 1) * sizeof(char));
+	if (name_struct->name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a new taxon preferred name");
+		return -1;
+	}
+	strcpy(name_struct->name, preferred_name);
+
+	name_struct->class_name = (char*) malloc((strlen("preferred name") + 1) * sizeof(char));
+	if (name_struct->class_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a taxon class name to add a new preferred name");
+		return -1;
+	}
+	strcpy(name_struct->class_name, "preferred name");
+	name_struct->is_scientific_name = false;
+	name_struct->taxon = taxon;
+
+	// Sort preferred names in alphabetical order
+	qsort((tax->preferred_names)->names, (tax->preferred_names)->count, sizeof(econame_t), cmp_names);
+
+	// Update preferred name count
+	((tax->preferred_names)->count)++;
+
+	return 0;
+}
 
 
 ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
@@ -3068,22 +3518,6 @@ ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
 }
 
 
-ecotx_t* obi_taxo_get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)	// TODO discuss keeping private?
-{
-	ecotx_t *current_taxon;
-	int32_t  count;
-
-	count = (taxonomy->taxa)->count;
-
-	current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid),
-                                       (const void *) taxonomy->taxa->taxon,
-                                       count,
-                                       sizeof(ecotx_t),
-									   cmp_taxids_in_ecotx_t);
-	return current_taxon;
-}
-
-
 ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
 {
 	ecotx_t     *current_taxon;
@@ -3234,5 +3668,3 @@ ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
 	return obi_taxo_get_parent_at_rank(taxon, rankindex);
 }
 
-
-
diff --git a/src/obidms_taxonomy.h b/src/obidms_taxonomy.h
index fa2f511..dcce499 100644
--- a/src/obidms_taxonomy.h
+++ b/src/obidms_taxonomy.h
@@ -6,7 +6,7 @@
  * @file obidms_taxonomy.h
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  * @date March 2nd 2016
- * @brief Header file for the functions handling the reading of binary taxonomy files.
+ * @brief Header file for the functions handling the reading and writing of taxonomy files.
  */
 
 
@@ -17,123 +17,384 @@
 #include "obidms.h"
 
 
-#define MIN_LOCAL_TAXID (10000000)
-#define TAX_NAME_LEN (1024)
+#define MIN_LOCAL_TAXID (10000000)     	  /**< The minimum taxid for a taxon added locally (i.e. not an NCBI taxon).
+                                	       */
+#define TAX_NAME_LEN (1024)				  /**< The maximum length for the taxonomy name.
+                                	       */
 
 
+/**
+ * @brief Structure for a taxon as stored in a .tdx file.
+ */
 typedef struct {
-	int32_t  taxid;
-	int32_t  rank;
-	int32_t	 parent;
-	int32_t  name_length;
-	char     name[];
+	int32_t  taxid;        /**< Taxid.
+	 	 	    		    */
+	int32_t  rank;         /**< Rank index.
+	    				    */
+	int32_t	 parent;       /**< Index, in the taxid index, of the parent node in the taxonomic tree.
+	    					*/
+	int32_t  name_length;  /**< Length of the taxon scientific name.
+							*/
+	char     name[];	   /**< Scientific name of the taxon.
+							*/
 } ecotxformat_t;
 
 
+/**
+ * @brief Structure for a taxon as stored in a taxonomy structure.
+ */
 typedef struct ecotxnode {
-	int32_t           taxid;	// TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
-	int32_t           rank;
-	int32_t  		  farest;
-	int32_t			  idx;
-	struct ecotxnode* parent;
-	char*			  name; // scientific name
-	char*			  preferred_name; // preferred name
-	bool			  local;
+	int32_t           taxid;		 	/**< Taxid.		// TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
+	    								 */
+	int32_t           rank;		        /**< Rank index in ecorankidx_t structure.
+	 	 	 	 	 	 	 	 	 	 */
+	int32_t  		  farest;		    /**< Longest branch length, used to compute distances between taxa faster.
+	 	 	 	 	 	 	 	 	 	 */
+	int32_t			  idx;			    /**< Index in the ecotxidx_t structure.
+	 	 	 	 	 	 	 	 	 	 */
+	struct ecotxnode* parent;			/**< Pointer on the parent node in the taxonomic tree.
+	 	 	 	 	 	 	 	 	 	 */
+	char*			  name;		   		/**< Scientific name of the taxon.
+										 */
+	char*			  preferred_name;	/**< Preferred name of the taxon if there is one, otherwise NULL.
+										 */
+	bool			  local;			/**< A boolean indicating whether the taxon is local or not.
+	 	 	 	 	 	 	 	 	 	 */
 } ecotx_t;
 
 
+/**
+ * @brief Structure for the taxon index in a taxonomy structure.
+ */
 typedef struct {
-	int32_t count;
-	int32_t ncbi_count;
-	int32_t local_count;
-	int32_t max_taxid;
-	int32_t buffer_size;
-	ecotx_t taxon[];
+	int32_t count;			/**< Number of taxa.
+	 	 	 	 	 	 	 */
+	int32_t ncbi_count;		/**< Number of NCBI taxa.
+	 	 	 	 	 	 	 */
+	int32_t local_count;	/**< Number of taxa added locally.
+ 	 	 	 	 	 	 	 */
+	int32_t max_taxid;		/**< Maximum taxid existing in the taxon index.
+	 	 	 	 	 	 	 */
+	int32_t buffer_size;	/**< Number of taxa.	// TODO kept this but not sure of its use
+ 	 	 	 	 	 	 	 */
+	ecotx_t taxon[];		/**< Taxon array.
+ 	 	 	 	 	 	 	 */
 } ecotxidx_t;
 
 
+/**
+ * @brief Structure for the rank index in a taxonomy structure.
+ */
 typedef struct {
-	int32_t count;
-	char*   label[];
+	int32_t count;		/**< Number of ranks.
+ 	 	 	 	 	 	 */
+	char*   label[];	/**< Array of rank names.
+	 	 	 	 	 	 */
 } ecorankidx_t;
 
 
+/**
+ * @brief Structure for a taxon name as stored in a .ndx file.
+ */
 typedef struct {
- 	int32_t  is_scientific_name;
-	int32_t  name_length;
-	int32_t  class_length;
-	int32_t  taxid;	// taxid idx
-	char     names[];
+ 	int32_t  is_scientific_name;	/**< A boolean indicating whether the name is a scientific name or not.
+	 	 	 	 	 	 	 	 	 */
+	int32_t  name_length;			/**< The name length.
+	 	 	 	 	 	 	 	 	 */
+	int32_t  class_length;			/**< The name class length.
+	 	 	 	 	 	 	 	 	 */
+	int32_t  taxid;       			/**< Index of the taxon in the taxid index.
+									 */
+	char     names[];       		/**< Taxon name and name class concatenated.
+	 	 	 	 	 	 	 	 	 */
 } econameformat_t;
 
 
+/**
+ * @brief Structure for a taxon name as stored in a taxonomy structure.
+ */
 typedef struct {
-	char*   		  name;
-	char*   		  class_name;
-	int32_t 		  is_scientific_name;
-	struct ecotxnode* taxon;
+	char*   		  name;					/**< Taxon name.
+	 	 	 	 	 	 	 	 	 	 	 */
+	char*   		  class_name;			/**< Name class.
+	 	 	 	 	 	 	 	 	 	 	 */
+	int32_t 		  is_scientific_name;	/**< A boolean indicating whether the name is a scientific name or not.
+	 	 	 	 	 	 	 	 	 	 	 */
+	struct ecotxnode* taxon;				/**< Pointer on the taxon in the taxon index.
+	 	 	 	 	 	 	 	 	 	 	 */
 } econame_t;
 
 
+/**
+ * @brief Structure for the name index in a taxonomy structure.
+ */
 typedef struct {
-	int32_t   count;
-	econame_t names[];
+	int32_t   count;		/**< Number of names.
+	 	 	 	 	 	 	 */
+	econame_t names[];		/**< Array of names.
+	 	 	 	 	 	 	 */
 } econameidx_t;
 
 
+/**
+ * @brief Structure for a taxid/index pair as stored in a taxonomy structure.
+ */
 typedef struct {
-	int32_t taxid;
-	int32_t idx;
+	int32_t taxid;		/**< Taxid.
+	 	 	 	 	 	 */
+	int32_t idx;		/**< Index of the taxid in the taxon index, -1 if the taxid is deprecated.
+	 	 	 	 	 	 */
 } ecomerged_t;
 
 
+/**
+ * @brief Structure for a merged taxid index in a taxonomy structure.
+ *
+ * This index includes all deprecated taxids that now refer to different taxids, and
+ * the deprecated taxids that are deleted.
+ *
+ */
 typedef struct {
-	int32_t     count;
-	ecomerged_t merged[];
+	int32_t     count;		/**< Number of taxid/index pairs.
+ 	 	 	 	 	 	 	 */
+	ecomerged_t merged[];	/**< Array of taxid/index pairs.
+	 	 	 	 	 	 	 */
 } ecomergedidx_t;
 
 
+/**
+ * @brief Structure for a taxonomy.
+ */
 typedef struct OBIDMS_taxonomy_t {
-	char            tax_name[TAX_NAME_LEN];
-	OBIDMS_p        dms;
-	ecomergedidx_t* merged_idx;
-	ecorankidx_t*   ranks;
-	econameidx_t*   names;
-	econameidx_t*   preferred_names;
-	ecotxidx_t*     taxa;
+	char            tax_name[TAX_NAME_LEN];		/**< Taxonomy name.
+	 	 	 	 	 	 	 	 	 	 	 	 */
+	OBIDMS_p        dms;						/**< A pointer on the DMS to which the taxonomy belongs.
+ 	 	 	 	 	 	 	 	 	 	 	 	 */
+	ecomergedidx_t* merged_idx;					/**< Merged taxid index.
+	 	 	 	 	 	 	 	 	 	 	 	 */
+	ecorankidx_t*   ranks;						/**< Taxonomic ranks.
+	 	 	 	 	 	 	 	 	 	 	 	 */
+	econameidx_t*   names;						/**< Taxon names.
+ 	 	 	 	 	 	 	 	 	 	 	 	 */
+	econameidx_t*   preferred_names;			/**< Taxon preferred names (i.e. added locally).
+	 	 	 	 	 	 	 	 	 	 	 	 */
+	ecotxidx_t*     taxa;						/**< Taxa.
+	 	 	 	 	 	 	 	 	 	 	 	 */
 } OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
 
 
-OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
-
-int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
-
-ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
-
-ecotx_t* obi_taxo_get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
-ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
-
-bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
-
-ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
-
-ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
-
-ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
-
-ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
-
-ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
-
-int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
-
+/**
+ * @brief Function reading an NCBI taxdump and loading its information into a taxonomy structure.
+ *
+ * @param taxdump The path to the taxdump directory.
+ *
+ * @returns A pointer on the read taxonomy structure.
+ * @retval NULL if an error occurred.
+ *
+ * @since 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
 OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
 
+
+/**
+ * @brief Function reading a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files)
+ * 		  and loading its information into a taxonomy structure.
+ *
+ * @param dms A pointer on the DMS to which the taxonomy belongs.
+ * @param taxonomy_name The name (prefix) of the taxonomy.
+ * @param read_alternative_names A boolean indicating whether names other than scientific and preferred names should be read.
+ *
+ * @returns A pointer on the read taxonomy structure.
+ * @retval NULL if an error occurred.
+ *
+ * @since 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
+
+
+/**
+ * @brief Function writing a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files).
+ *
+ * @param dms A pointer on the DMS to which the taxonomy belongs.
+ * @param tax A pointer on the taxonomy structure.
+ * @param tax_name The name (prefix) of the taxonomy.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
+
+
+/**
+ * @brief Function closing a taxonomy structure.
+ *
+ * This function writes all changes to the binary files (local taxa and preferred names) and free all allocated memory for the structure.
+ *
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Function adding a local taxon to a taxonomy.
+ *
+ * @param tax A pointer on the taxonomy structure.
+ * @param name The taxon scientific name.
+ * @param rank_name The taxon rank name.
+ * @param parent_taxid The taxid of the parent node in the taxonomic tree.
+ * @param min_taxid The minimum taxid to give to the new taxon (the function will choose a new taxid >= min_taxid and >= MIN_LOCAL_TAXID).
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
 int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
 
+
+/**
+ * @brief Function adding a preferred name to a taxon in a taxonomy, referred to by its taxid.
+ *
+ * @param tax A pointer on the taxonomy structure.
+ * @param taxid The taxid of the taxon that should have a new preferred name.
+ * @param preferred_name The new preferred name.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
 int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
 
+
+/**
+ * @brief Function adding a preferred name to a taxon in a taxonomy, referred to by the taxon pointer.
+ *
+ * @param tax A pointer on the taxonomy structure.
+ * @param taxon A pointer on the taxon that should have a new preferred name.
+ * @param preferred_name The new preferred name.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
 int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);
 
 
+/**
+ * @brief Function returning the parent of a taxon at a given rank.
+ *
+ * @param taxon A pointer on the taxon.
+ * @param rankidx The index of the rank wanted.
+ *
+ * @returns A pointer on the parent taxon at the wanted rank.
+ * @retval NULL if no parent taxon was found at the wanted rank.
+ */
+ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
+
+
+/**
+ * @brief Function returning a taxon given its taxid.
+ *
+ * @param taxonomy A pointer on the taxonomy.
+ * @param taxid The taxid of the taxon.
+ *
+ * @returns A pointer on the wanted taxon.
+ * @retval NULL if no taxon was found with the given taxid.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
+
+
+/**
+ * @brief Function checking whether a taxon is under another in the taxonomy tree.
+ *
+ * @param taxon A pointer on the first taxon.
+ * @param other_taxid The taxid of the second taxon.
+ *
+ * @returns A boolean indicating whether the first taxon is under the second taxon in the taxonomy tree.
+ */
+bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
+
+
+/**
+ * @brief Function returning the parent of a taxon at the species level.
+ *
+ * @param taxon A pointer on the taxon.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on the parent taxon at the species level.
+ * @retval NULL if no parent taxon was found at the wanted rank.
+ */
+ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Function returning the parent of a taxon at the genus level.
+ *
+ * @param taxon A pointer on the taxon.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on the parent taxon at the genus level.
+ * @retval NULL if no parent taxon was found at the wanted rank.
+ */
+ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Function returning the parent of a taxon at the family level.
+ *
+ * @param taxon A pointer on the taxon.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on the parent taxon at the family level.
+ * @retval NULL if no parent taxon was found at the wanted rank.
+ */
+ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Function returning the parent of a taxon at the kingdom level.
+ *
+ * @param taxon A pointer on the taxon.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on the parent taxon at the kingdom level.
+ * @retval NULL if no parent taxon was found at the wanted rank.
+ */
+ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
+
+
+/**
+ * @brief Function returning the parent of a taxon at the superkingdom level.
+ *
+ * @param taxon A pointer on the taxon.
+ * @param taxonomy A pointer on the taxonomy structure.
+ *
+ * @returns A pointer on the parent taxon at the superkingdom level.
+ * @retval NULL if no parent taxon was found at the wanted rank.
+ */
+ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
 

From 651c1d7845075673d77bba5142fec964d41dcf38 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Tue, 31 Jan 2017 16:45:47 +0100
Subject: [PATCH 18/22] utilities: bsearch and qsort with additional user_data
 pointer argument

---
 src/utils.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/utils.h |  37 ++++++++++
 2 files changed, 227 insertions(+)

diff --git a/src/utils.c b/src/utils.c
index f7a0ff5..37e5f0c 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -116,3 +116,193 @@ void* obi_get_memory_aligned_on_16(int size, int* shift)
 	return (memory);
 }
 
+
+/*
+ * A generic implementation of binary search for the Linux kernel
+ *
+ * Copyright (C) 2008-2009 Ksplice, Inc.
+ * Author: Tim Abbott <tabbott@ksplice.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2.
+ */
+void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data,
+              	  	  	int (*cmp)(const void *key, const void *elt, const void* user_data))
+{
+	size_t start = 0;
+	size_t end   = num;
+	size_t mid;
+    int result;
+
+    while (start < end)
+    {
+    	mid = start + (end - start) / 2;
+    	result = cmp(key, base + mid * size, user_data);
+		if (result < 0)
+			end = mid;
+		else if (result > 0)
+			start = mid + 1;
+		else
+			return (void*)base + mid * size;
+    }
+
+    return NULL;
+}
+
+
+/*
+ * Copyright (c) 1992, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
+ */
+
+#define MIN(a,b) ((a) < (b) ? a : b)
+
+#define swapcode(TYPE, parmi, parmj, n) {               \
+        long i = (n) / sizeof (TYPE);                   \
+        register TYPE *pi = (TYPE *) (parmi);           \
+        register TYPE *pj = (TYPE *) (parmj);           \
+        do {                                            \
+                register TYPE   t = *pi;                \
+                *pi++ = *pj;                            \
+                *pj++ = t;                              \
+        } while (--i > 0);                              \
+}
+
+#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
+        es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
+
+static __inline void
+swapfunc(char *a, char *b, int n, int swaptype)
+{
+        if (swaptype <= 1)
+                swapcode(long, a, b, n)
+        else
+                swapcode(char, a, b, n)
+}
+
+#define swap(a, b)                                      \
+        if (swaptype == 0) {                            \
+                long t = *(long *)(a);                  \
+                *(long *)(a) = *(long *)(b);            \
+                *(long *)(b) = t;                       \
+        } else                                          \
+                swapfunc(a, b, es, swaptype)
+
+#define vecswap(a, b, n)        if ((n) > 0) swapfunc(a, b, n, swaptype)
+
+static __inline char *
+med3(char *a, char *b, char *c, const void *user_data, int (*cmp)(const void *, const void *, const void *))
+{
+        return cmp(a, b, user_data) < 0 ?
+               (cmp(b, c, user_data) < 0 ? b : (cmp(a, c, user_data) < 0 ? c : a ))
+              :(cmp(b, c, user_data) > 0 ? b : (cmp(a, c, user_data) < 0 ? a : c ));
+}
+
+void
+qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *))
+{
+        char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+        int d, r, swaptype, swap_cnt;
+        register char *a = aa;
+
+loop:   SWAPINIT(a, es);
+        swap_cnt = 0;
+        if (n < 7) {
+                for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
+                        for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0;
+                             pl -= es)
+                                swap(pl, pl - es);
+                return;
+        }
+        pm = (char *)a + (n / 2) * es;
+        if (n > 7) {
+                pl = (char *)a;
+                pn = (char *)a + (n - 1) * es;
+                if (n > 40) {
+                        d = (n / 8) * es;
+                        pl = med3(pl, pl + d, pl + 2 * d, user_data, cmp);
+                        pm = med3(pm - d, pm, pm + d, user_data, cmp);
+                        pn = med3(pn - 2 * d, pn - d, pn, user_data, cmp);
+                }
+                pm = med3(pl, pm, pn, user_data, cmp);
+        }
+        swap(a, pm);
+        pa = pb = (char *)a + es;
+
+        pc = pd = (char *)a + (n - 1) * es;
+        for (;;) {
+                while (pb <= pc && (r = cmp(pb, a, user_data)) <= 0) {
+                        if (r == 0) {
+                                swap_cnt = 1;
+                                swap(pa, pb);
+                                pa += es;
+                        }
+                        pb += es;
+                }
+                while (pb <= pc && (r = cmp(pc, a, user_data)) >= 0) {
+                        if (r == 0) {
+                                swap_cnt = 1;
+                                swap(pc, pd);
+                                pd -= es;
+                        }
+                        pc -= es;
+                }
+                if (pb > pc)
+                        break;
+                swap(pb, pc);
+                swap_cnt = 1;
+                pb += es;
+                pc -= es;
+        }
+        if (swap_cnt == 0) {  /* Switch to insertion sort */
+                for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
+                        for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0;
+                             pl -= es)
+                                swap(pl, pl - es);
+                return;
+        }
+
+        pn = (char *)a + n * es;
+        r = MIN(pa - (char *)a, pb - pa);
+        vecswap(a, pb - r, r);
+        r = MIN((long)(pd - pc), (long)(pn - pd - es));
+        vecswap(pb, pn - r, r);
+        if ((r = pb - pa) > (int)es)
+                qsort_user_data(a, r / es, es, user_data, cmp);
+        if ((r = pd - pc) > (int)es) {
+                /* Iterate rather than recurse to save stack space */
+                a = pn - r;
+                n = r / es;
+                goto loop;
+        }
+/*              qsort(pn - r, r / es, es, cmp);*/
+}
+
diff --git a/src/utils.h b/src/utils.h
index 8ac9a6c..ecab62e 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -74,4 +74,41 @@ char* obi_format_date(time_t date);
 void* obi_get_memory_aligned_on_16(int size, int* shift);
 
 
+/**
+ * @brief Version of quick sort modified to allow the user to provide an
+ *        additional pointer sent to the comparison function.
+ *
+ * @param key This is the pointer to the object that serves as key for the search, type-casted as a void*.
+ * @param base This is the pointer to the first object of the array where the search is performed, type-casted as a void*.
+ * @param num This is the number of elements in the array pointed by base.
+ * @param size This is the size in bytes of each element in the array.
+ * @param user_data This is an additional pointer passed to the comparison function.
+ * @param cmp This is the function that compares two elements, eventually with an additional pointer.
+ *
+ * @returns A pointer to an entry in the array that matches the search key.
+ * @retval NULL if key is not found.
+ *
+ * @since January 2017
+ * @author original code modified by Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data,
+              	  	  	int (*cmp)(const void *key, const void *elt, const void* user_data));
+
+
+/**
+ * @brief Version of quick sort modified to allow the user to provide an
+ *        additional pointer sent to the comparison function.
+ *
+ * @param aa This is the pointer to the first element of the array to be sorted.
+ * @param n This is the number of elements in the array pointed by base.
+ * @param es This is the size in bytes of each element in the array.
+ * @param user_data This is an additional pointer passed to the comparison function.
+ * @param cmp This is the function that compares two elements, eventually with an additional pointer.
+ *
+ * @since January 2017
+ * @author original code modified by Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+void qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *));
+
+
 #endif /* UTILS_H_ */

From e50da64ea19629ea7cf150f051305805ec0b2da9 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Tue, 31 Jan 2017 16:48:06 +0100
Subject: [PATCH 19/22] The elements names when a column contains several
 elements per line are now formatted with '\0' as separator and handled in a
 more optimized way

---
 python/obitools3/commands/test.pyx            |   8 +-
 python/obitools3/obidms/_obidms.pyx           |  14 +-
 python/obitools3/obidms/capi/obidmscolumn.pxd |  29 +-
 src/obidmscolumn.c                            | 376 +++++++++++++++---
 src/obidmscolumn.h                            | 128 +++---
 src/obiview.c                                 |   8 +-
 6 files changed, 411 insertions(+), 152 deletions(-)

diff --git a/python/obitools3/commands/test.pyx b/python/obitools3/commands/test.pyx
index 774494a..6f001ce 100644
--- a/python/obitools3/commands/test.pyx
+++ b/python/obitools3/commands/test.pyx
@@ -97,8 +97,7 @@ def test_set_and_get(config, infos):
         return
     idx = random_int(config)
     value = infos['random_generator'][data_type](config)
-    
-    if len(element_names) > 1 :
+    if col.nb_elements_per_line > 1 :
         elt = random.choice(element_names)
         col[idx][elt] = value
         assert col[idx][elt] == value, "Set value != gotten value "+str(col[idx][elt])+" != "+str(value)
@@ -187,6 +186,7 @@ def create_random_column(config, infos) :
     elements_names = []
     for i in range(nb_elements_per_line) :
         elements_names.append(random_unique_element_name(config, infos))
+    elements_names = random.choice([None, elements_names])
     name = random_unique_name(infos)
     infos['view'].add_column(name, 
                              alias=alias, 
@@ -358,7 +358,9 @@ def run(config):
     config['test']['elt_name_max_len'] = int((COL_COMMENTS_MAX_LEN - config['test']['maxelts']) / config['test']['maxelts'])
 
     print("Initializing the DMS and the first view...")
-            
+    
+    shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
+
     ini_dms_and_first_view(config, infos)
     print_test(config, repr(infos['view']))
     
diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx
index 7f86c59..943b3e2 100644
--- a/python/obitools3/obidms/_obidms.pyx
+++ b/python/obitools3/obidms/_obidms.pyx
@@ -7,7 +7,8 @@ from .capi.obidms cimport obi_dms, \
                           
 from .capi.obidmscolumn cimport obi_close_column, \
                                 OBIDMS_column_p, \
-                                OBIDMS_column_header_p
+                                OBIDMS_column_header_p, \
+                                obi_get_elements_names
 
 from .capi.obiutils cimport obi_format_date
                    
@@ -75,7 +76,7 @@ from .capi.obiview cimport Obiview_p, \
                            DEFINITION_COLUMN, \
                            QUALITY_COLUMN
                            
-from libc.stdlib cimport malloc
+from libc.stdlib cimport malloc, free
 
 
 cdef class OBIDMS_column :
@@ -138,7 +139,12 @@ cdef class OBIDMS_column :
     # elements_names property getter
     @property
     def elements_names(self):
-        return (bytes2str(((self._pointer)[0].header).elements_names)).split(';')
+        cdef char* elts_names_b
+        cdef str   elts_names
+        elts_names_b = obi_get_elements_names((self._pointer)[0])
+        elts_names = bytes2str(elts_names_b)
+        free(<char*>elts_names_b)
+        return elts_names.split(';')
 
     # nb_elements_per_line property getter
     @property
@@ -376,7 +382,7 @@ cdef class OBIView :
             elements_names_b = str2bytes("")
         else :
             elements_names_b = str2bytes(';'.join(elements_names))
-    
+            
         if type :                       # TODO make C function that does that
             if type == 'OBI_INT' :
                 data_type = OBI_INT
diff --git a/python/obitools3/obidms/capi/obidmscolumn.pxd b/python/obitools3/obidms/capi/obidmscolumn.pxd
index b23b85f..9589421 100644
--- a/python/obitools3/obidms/capi/obidmscolumn.pxd
+++ b/python/obitools3/obidms/capi/obidmscolumn.pxd
@@ -47,31 +47,8 @@ cdef extern from "obidmscolumn.h" nogil:
         bint                   writable
         
     ctypedef OBIDMS_column_t* OBIDMS_column_p
-
-    OBIDMS_column_p obi_create_column(OBIDMS_p dms, 
-                                      const_char_p column_name, 
-                                      OBIType_t type, 
-                                      index_t nb_lines, 
-                                      index_t nb_elements_per_line, 
-                                      const_char_p elements_names,
-                                      const_char_p indexer_name,
-                                      const_char_p associated_colum_name,
-                                      obiversion_t associated_colum_version,
-                                      const_char_p comments)
-                
-    OBIDMS_column_p obi_open_column(OBIDMS_p dms, 
-                                    const_char_p column_name, 
-                                    obiversion_t version_number)
         
     int obi_close_column(OBIDMS_column_p column)
-        
-    OBIDMS_column_p obi_clone_column(OBIDMS_p dms, 
-                                     OBIDMS_column_p line_selection,
-                                     const_char_p column_name, 
-                                     obiversion_t version_number, 
-                                     bint clone_data)
-    
-    int obi_close_column(OBIDMS_column_p column)
     
     obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, 
                                                          const_char_p column_name)
@@ -81,9 +58,9 @@ cdef extern from "obidmscolumn.h" nogil:
                                                            obiversion_t version_number)
     
     int obi_close_header(OBIDMS_column_header_p header)
-
-    int obi_select(OBIDMS_column_p line_selection_column, index_t line_to_grep)
-
+    
+    char* obi_get_elements_names(OBIDMS_column_p column)
+    
 
 cdef extern from "obidmscolumn_int.h" nogil:
 
diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c
index a704ab1..00fbae1 100644
--- a/src/obidmscolumn.c
+++ b/src/obidmscolumn.c
@@ -119,7 +119,7 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
 
 /**
  * @brief Internal function building the default elements names of the lines of a
- *        column (i.e. "0;1;2;...;n").
+ *        column, with ';' as separator (i.e. "0;1;2;...;n\0").
  *
  * @warning The returned pointer has to be freed by the caller.
  *
@@ -134,12 +134,61 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
 static char* build_default_elements_names(index_t nb_elements_per_line);
 
 
+/**
+ * @brief Internal function formatting the elements names of the lines of a
+ *        column with '\0' as separator (e.g. "0\01\02\0...\0n\0").
+ *
+ * @param elements_names The character string formatted with ';' as separator (e.g. "0;1;2;...;n\0").
+ * @param elts_names_length A pointer on an integer where the function will store the length of the character string.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static void format_elements_names(char* elements_names, int* elts_names_length);
+
+
+/**
+ * @brief Internal function comparing two element names using their sorted index, using data stored in the column header.
+ *
+ * @param n1_sort_idx A pointer on the sorted index of the first name.
+ * @param n2_sort_idx A pointer on the sorted index of the second name.
+ * @param h A pointer on the column header.
+ *
+ * @returns A value < 0 if name1 < name2,
+ * 			a value > 0 if name1 > name2,
+ * 			and 0 if name1 == name2.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h);
+
+
+/**
+ * @brief Internal function comparing two element names using a pointer on the first name and the sorted index of the second name,
+ * 		  using data stored in the column header.
+ *
+ * @param name1 A pointer on the first name.
+ * @param n2_sort_idx A pointer on the sorted index of the second name.
+ * @param h A pointer on the column header.
+ *
+ * @returns A value < 0 if name1 < name2,
+ * 			a value > 0 if name1 > name2,
+ * 			and 0 if name1 == name2.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h);
+
+
 /**
  * @brief Internal function setting the elements names of the lines of a
  *        column in the header of the OBIDMS column structure.
  *
  * @param column A pointer as returned by obi_create_column().
- * @param elements_names The names of the elements with ';' as separator.
+ * @param elements_names The names of the elements as formatted by format_elements_names().
+ * @param elts_names_length The length of elements_names.
  *
  * @retval 0 if the operation was successfully completed.
  * @retval -1 if an error occurred.
@@ -147,7 +196,35 @@ static char* build_default_elements_names(index_t nb_elements_per_line);
  * @since July 2015
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-static int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names);
+static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length);
+
+
+/**
+ * @brief Internal function counting the number of elements names in a character array.
+ *
+ * @param elements_names A pointer on the character string corresponding to the elements names,
+ *                       formatted with ';' or with '\0' as separator.
+ * @param elt_names_formatted Whether the separator is ';' (false), or '\0' (true, as formatted by format_elements_names()).
+ *
+ * @returns The number of elements names in the character array.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted);
+
+
+/**
+ * @brief Internal function computing the length of a character array containing elements names as formatted by format_elements_names().
+ *
+ * @param elements_names A pointer on the character string corresponding to the elements names as formatted by format_elements_names().
+ *
+ * @returns The length of a character array.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int get_formatted_elt_names_length(const char* elements_names);
 
 
 /**
@@ -198,6 +275,7 @@ static char* build_column_file_name(const char* column_name, obiversion_t versio
 }
 
 
+
 static char* build_version_file_name(const char* column_name)
 {
 	char* file_name;
@@ -222,6 +300,7 @@ static char* build_version_file_name(const char* column_name)
 }
 
 
+
 static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block)
 {
 	off_t 			loc_size;
@@ -346,6 +425,7 @@ static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_
 }
 
 
+
 static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory)
 {
 	off_t 			loc_size;
@@ -437,10 +517,12 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
 }
 
 
+
 static char* build_default_elements_names(index_t nb_elements_per_line)
 {
 	char* elements_names;
 	int   i;
+	int   len;
 
 	elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
 	if (elements_names == NULL)
@@ -457,31 +539,169 @@ static char* build_default_elements_names(index_t nb_elements_per_line)
 		return NULL;
 	}
 
-	for (i= 0; i < nb_elements_per_line; i++)
-		sprintf(elements_names, "%d", i);
+	len = 0;
+	for (i = 0; i < nb_elements_per_line; i++)
+		len += sprintf(elements_names+len, "%d;", i);
 
 	// Terminal character
-	elements_names[strlen(elements_names)] = '\0';
+	elements_names[len-1] = '\0';	// -1 to delete last ';'
+	len--;
 
 	return elements_names;
 }
 
 
-int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names)
+
+static void format_elements_names(char* elements_names, int* elts_names_length)
 {
-	if (strlen(elements_names) > ELEMENTS_NAMES_MAX)
+	int i;
+
+	*elts_names_length = strlen(elements_names);
+
+	// Replace the ';' with '\0'
+	for (i=0; i < *elts_names_length; i++)
+	{
+		if (elements_names[i] == ';')
+			elements_names[i] = '\0';
+	}
+}
+
+
+
+static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h)
+{
+	char* name1=NULL;
+	char* name2=NULL;
+
+	int name1_idx;
+	int name2_idx;
+
+	int name1_sort_idx = *((int*)n1_sort_idx);
+	int name2_sort_idx = *((int*)n2_sort_idx);
+	OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
+
+	name1_idx = (header->elements_names_idx)[name1_sort_idx];
+	name1 = (header->elements_names)+name1_idx;
+
+	name2_idx = (header->elements_names_idx)[name2_sort_idx];
+	name2 = (header->elements_names)+name2_idx;
+
+	return strcmp(name1, name2);
+}
+
+
+
+static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h)
+{
+	char* name2=NULL;
+	int name2_idx;
+
+	int name2_sort_idx = *((int*)n2_sort_idx);
+	OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
+
+	name2_idx = (header->elements_names_idx)[name2_sort_idx];
+	name2 = (header->elements_names)+name2_idx;
+
+	return strcmp(name1, name2);
+}
+
+
+
+static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length)
+{
+	int i, j;
+
+	// Check that the elements names are not too long
+	if (elts_names_length+2 > ELEMENTS_NAMES_MAX)
 	{
 		obi_set_errno(OBICOL_UNKNOWN_ERROR);
 		obidebug(1, "\nError: element names too long (max: %d)", ELEMENTS_NAMES_MAX);
 		return -1;
 	}
 
-	strcpy((column->header)->elements_names, elements_names);
+	// Copy the elements names in the header
+	memcpy((column->header)->elements_names, elements_names, elts_names_length*sizeof(char));
+
+	// Terminal characters
+	(column->header)->elements_names[elts_names_length] = '\0';
+	(column->header)->elements_names[elts_names_length + 1] = '\0';
+
+	// Store the length of the character array containing the elements names
+	(column->header)->elements_names_length = elts_names_length;
+
+	// Build the elements names index
+	i = 0;
+	j = 0;
+	// Index the first element name
+	((column->header)->elements_names_idx)[j] = i;
+	((column->header)->sorted_elements_idx)[j] = j;
+	i++;
+	j++;
+	while (i < elts_names_length)
+	{
+		if (elements_names[i] == '\0')
+		{	// Index new element name
+			((column->header)->elements_names_idx)[j] = i+1;
+			((column->header)->sorted_elements_idx)[j] = j;
+			j++;
+		}
+		i++;
+	}
+
+	// Build the sorted index
+	qsort_user_data((column->header)->sorted_elements_idx, j, sizeof(int), column->header, cmp_elements_names_with_idx);
+
 	return 0;
 }
 
 
-index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
+
+static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted)
+{
+	char    sep;
+	int     i = 0;
+	bool    stop = false;
+	index_t count = 0;
+
+	if (elt_names_formatted)
+		sep = FORMATTED_ELT_NAMES_SEPARATOR;
+	else
+		sep = NOT_FORMATTED_ELT_NAMES_SEPARATOR;
+
+	while (! stop)
+	{
+		if ((elt_names_formatted && (elements_names[i] == '\0') && (elements_names[i+1] == '\0')) ||
+				((! elt_names_formatted) && (elements_names[i] == '\0')))
+			stop = true;
+		if ((elements_names[i] == sep) || (elements_names[i] == '\0'))
+			count++;
+		i++;
+	}
+
+	return count;
+}
+
+
+
+static int get_formatted_elt_names_length(const char* elements_names)
+{
+	int     i = 0;
+	bool    stop = false;
+
+	while (! stop)
+	{
+		if ((elements_names[i] == '\0') && (elements_names[i+1] == '\0'))
+			stop = true;
+		else
+			i++;
+	}
+
+	return i;
+}
+
+
+
+static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
 {
 	return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
 }
@@ -493,6 +713,7 @@ index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_lin
  *
  **********************************************************************/
 
+
 obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory)
 {
 	off_t 			loc_size;
@@ -557,6 +778,7 @@ obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_dire
 }
 
 
+
 obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name)
 {
 	OBIDMS_column_directory_p	column_directory;
@@ -582,6 +804,7 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c
 }
 
 
+
 size_t obi_get_platform_header_size()
 {
 	size_t header_size;
@@ -607,7 +830,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 								  const char*  indexer_name,
 								  const char*  associated_column_name,
 								  obiversion_t associated_column_version,
-								  const char*  comments
+								  const char*  comments,
+								  bool		   elt_names_formatted
 								 )
 {
 	OBIDMS_column_p 			new_column;
@@ -623,6 +847,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 	OBIType_t  					returned_data_type;
 	OBIType_t  					stored_data_type;
 	char*			    		final_indexer_name;
+	char*						built_elements_names = NULL;
+	int							elts_names_length;
 
 	new_column = NULL;
 
@@ -695,31 +921,29 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 	else if (nb_lines < minimum_line_count)
 		nb_lines = minimum_line_count;
 
-	// Check and build if needed the element names
-	if ((elements_names == NULL) || (strcmp(elements_names, "") == 0))	// Build the default element names: str of the element index
+	// Check, format, and build if needed the element names
+	if ((elements_names == NULL) || (*elements_names == '\0'))	// Build the default element names: str of the element index
 	{
-		elements_names = build_default_elements_names(nb_elements_per_line);
-		if (elements_names == NULL)
+		built_elements_names = build_default_elements_names(nb_elements_per_line);
+		if (built_elements_names == NULL)
 			return NULL;
+		elements_names = built_elements_names;
 	}
-	else if (((elements_names == NULL) || (strcmp(elements_names, "") != 0)) && (nb_elements_per_line > 1))
+	else
 	{ // The number of elements names should be equal to the number of elements per line
-		char* token;
-		index_t n = 0;
-		token = strdup(elements_names);
-		token = strtok(token, ";");
-		while (token != NULL)
+		if (check_elt_names_count(elements_names, elt_names_formatted) != nb_elements_per_line)
 		{
-			token = strtok(NULL, ";");
-			n++;
-		}
-		if (n != nb_elements_per_line)
-		{
-			obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
+			obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line:"
+					"\n%lld elements per line\nelements names:%s\n", nb_elements_per_line, elements_names);
 			return NULL;
 		}
 	}
-	// TODO what if 1 element and name specified? doc
+
+	// Format the elements names string
+	if (! elt_names_formatted)
+		format_elements_names(elements_names, &elts_names_length);
+	else
+		elts_names_length = get_formatted_elt_names_length(elements_names);
 
 	// Calculate the size needed
 	header_size = obi_get_platform_header_size();
@@ -816,11 +1040,11 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 	header->version       		          = version_number;
 	header->cloned_from    		          = -1;
 
-	obi_column_set_elements_names(new_column, elements_names);
+	set_elements_names(new_column, elements_names, elts_names_length);
 
 	// Free the element names if they were built
-	if ((elements_names == NULL) || (strcmp(elements_names, "") == 0))
-		free(elements_names);
+	if (built_elements_names != NULL)
+		free(built_elements_names);
 
 	strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME);
 
@@ -886,6 +1110,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 }
 
 
+
 OBIDMS_column_p obi_open_column(OBIDMS_p     dms,
 								const char*  column_name,
 								obiversion_t version_number)
@@ -1043,6 +1268,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p     dms,
 }
 
 
+
 OBIDMS_column_p obi_clone_column(OBIDMS_p         dms,
 								 OBIDMS_column_p  line_selection,
 								 const char*      column_name,
@@ -1083,7 +1309,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p         dms,
 								   (column_to_clone->header)->indexer_name,
 								   ((column_to_clone->header)->associated_column).column_name,
 								   ((column_to_clone->header)->associated_column).version,
-								   (column_to_clone->header)->comments
+								   (column_to_clone->header)->comments,
+								   true
 								  );
 
 	if (new_column == NULL)
@@ -1097,6 +1324,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p         dms,
 		return NULL;
 	}
 
+
+
 	(new_column->header)->cloned_from = (column_to_clone->header)->version;
 
 	if (clone_data && (line_selection == NULL))
@@ -1137,6 +1366,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p         dms,
 }
 
 
+
 int obi_close_column(OBIDMS_column_p column)
 {
 	int     ret_val = 0;
@@ -1185,6 +1415,7 @@ int obi_close_column(OBIDMS_column_p column)
 }
 
 
+
 int obi_clone_column_indexer(OBIDMS_column_p column)
 {
 	char* new_indexer_name;
@@ -1208,6 +1439,7 @@ int obi_clone_column_indexer(OBIDMS_column_p column)
 }
 
 
+
 int obi_truncate_column(OBIDMS_column_p column)	// TODO is it necessary to unmap/remap?
 {
 	size_t  file_size;
@@ -1309,6 +1541,7 @@ int obi_truncate_column(OBIDMS_column_p column)	// TODO is it necessary to unmap
 }
 
 
+
 int obi_enlarge_column(OBIDMS_column_p column)
 {
 	size_t  file_size;
@@ -1363,7 +1596,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
 	header_size = (column->header)->header_size;
 	file_size = header_size + new_data_size;
 
-	// Enlarge the file // TODO isn't it possible that this makes the file "move"?
+	// Enlarge the file
 	if (ftruncate(column_file_descriptor, file_size) < 0)
 	{
 		obi_set_errno(OBICOL_UNKNOWN_ERROR);
@@ -1414,6 +1647,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
 }
 
 
+
 void obi_ini_to_NA_values(OBIDMS_column_p column,
 						  index_t first_line_nb,
 						  index_t nb_lines)
@@ -1479,6 +1713,7 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
 }
 
 
+
 OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number)
 {
 	OBIDMS_column_header_p 		header;
@@ -1562,6 +1797,7 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
 }
 
 
+
 int obi_close_header(OBIDMS_column_header_p header)
 {
 	if (munmap(header, header->header_size) < 0)
@@ -1574,47 +1810,56 @@ int obi_close_header(OBIDMS_column_header_p header)
 }
 
 
-// TODO to be rewritten in an optimized and safe way if possible
+
 index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
 {
-	char*   elements_names;
-	char*   name;
-	index_t element_index;
+	int* elt_names_idx;
 
-	elements_names = strdup((column->header)->elements_names);
-	if (elements_names == NULL)
-	{
-		obidebug(1, "\nError strdup-ing the elements names");
-		return OBIIdx_NA;
-	}
+	elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(int), column->header, cmp_elements_names_with_name_and_idx);
 
-	element_index = 0;
+	if (elt_names_idx != NULL)
+		return (index_t)(*elt_names_idx);
 
-	name = strtok(elements_names, ";");	// TODO not thread safe, see strtok_r maybe
-	if (strcmp(element_name, name) == 0)
-	{
-		free(elements_names);
-		return element_index;
-	}
-	element_index++;
-
-	while (name != NULL)
-	{
-		name = strtok(NULL, ";");			// TODO not thread safe, see strtok_r maybe
-		if (strcmp(element_name, name) == 0)
-		{
-			free(elements_names);
-			return element_index;
-		}
-		element_index++;
-	}
-
-	obidebug(1, "\nCan't find an element name");
-	free(elements_names);
+	obi_set_errno(OBICOL_UNKNOWN_ERROR);
+	obidebug(1, "\nError: could not find element name %s", element_name);
 	return OBIIdx_NA;
 }
 
 
+// TODO doc, returns elements names with ; as separator (discuss maybe char**)
+char* obi_get_elements_names(OBIDMS_column_p column)
+{
+	char* elements_names;
+	int   i, j;
+	int   elt_idx;
+	int   len;
+
+	elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
+	if (elements_names == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for elements names");
+		return NULL;
+	}
+
+	j = 0;
+	for (i=0; i < (column->header)->nb_elements_per_line; i++)
+	{
+		elt_idx = ((column->header)->elements_names_idx)[i];
+		len = strlen(((column->header)->elements_names)+elt_idx);
+		memcpy(elements_names+j, ((column->header)->elements_names)+elt_idx, len*sizeof(char));
+		j = j + len;
+		elements_names[j] = ';';
+		j++;
+	}
+
+	elements_names[j - 1] = '\0';
+
+	return elements_names;
+}
+
+
+
 int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
 {
 	// Check if the column is read-only
@@ -1649,6 +1894,7 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
 }
 
 
+
 int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
 {
 	if ((line_nb+1) > ((column->header)->line_count))
diff --git a/src/obidmscolumn.h b/src/obidmscolumn.h
index 9e5e348..cef1f59 100644
--- a/src/obidmscolumn.h
+++ b/src/obidmscolumn.h
@@ -28,17 +28,21 @@
 #include "obiblob_indexer.h"
 
 
-#define ELEMENTS_NAMES_MAX (2048)     	  /**< The maximum length of the list of elements names.	// TODO Discuss
-                                	       */
-#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
-										   *   are used ("0;1;2;...;n"), considering ELEMENTS_NAMES_MAX.
-                                	   	   */
-#define COLUMN_GROWTH_FACTOR (2)	 	  /**< The growth factor when a column is enlarged.
-                                	   	   */
-#define MAXIMUM_LINE_COUNT (1000000000)   /**< The maximum line count for the data of a column. //TODO
-                                	       */
-#define COMMENTS_MAX_LENGTH (2048)        /**< The maximum length for comments.
- 	 	 	 	 	 	 	 	 	       */
+#define ELEMENTS_NAMES_MAX (2048)     	  		/**< The maximum length of the list of elements names.	// TODO Discuss
+                                	       	   	 */
+#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) 		/**< The maximum number of elements per line if the default element names
+										   	   	 *   are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX.
+										   	   	 */
+#define COLUMN_GROWTH_FACTOR (2)	 	  		/**< The growth factor when a column is enlarged.
+                                	   	   	   	 */
+#define MAXIMUM_LINE_COUNT (1000000000)   		/**< The maximum line count for the data of a column. //TODO
+                                	       	   	 */
+#define COMMENTS_MAX_LENGTH (2048)        		/**< The maximum length for comments.
+ 	 	 	 	 	 	 	 	 	       	   	 */
+#define FORMATTED_ELT_NAMES_SEPARATOR '\0'		/**< The maximum length for comments.
+ 	 	 	 	 	 	 	 	 	       	   	 */
+#define NOT_FORMATTED_ELT_NAMES_SEPARATOR ';'   /**< The maximum length for comments.
+ 	 	 	 	 	 	 	 	 	 	 	 	 */
 
 
 /**
@@ -56,42 +60,48 @@ typedef struct Column_reference {
  * @brief OBIDMS column header structure.
  */
 typedef struct OBIDMS_column_header {
-	size_t				header_size;		   				    /**< Size of the header in bytes.
-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
-	size_t				data_size;			   				    /**< Size of the data in bytes.
-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
-	index_t				line_count;							    /**< Number of lines of data allocated.
-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
-	index_t				lines_used;							    /**< Number of lines of data used.
-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
-	index_t				nb_elements_per_line;   			   	/**< Number of elements per line.
-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
-	char				elements_names[ELEMENTS_NAMES_MAX+1];	/**< Names of the line elements with ';' as separator
-																 *   (no terminal ';').
-															 	 *	 (default are the indices: "0;1;2;...;n").
-															 	 */
-	OBIType_t			returned_data_type;		    			/**< Type of the data that is returned when getting an
-															 	 *   element from the column.
-															 	 */
-	OBIType_t			stored_data_type;		    			/**< Type of the data that is actually stored in the data
-															 	 *   part of the column.
-															 	 */
-	time_t				creation_date;			    			/**< Date of creation of the file.
-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
-	obiversion_t		version;				   				/**< Version of the column.
-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
-	obiversion_t		cloned_from;			    			/**< Version of the column from which this column
-															 	 *   was cloned from (-1 if it was not created by cloning
-															 	 *   another column).
-															 	 */
-	char            	name[OBIDMS_COLUMN_MAX_NAME+1]; 	    /**< The column name as a NULL terminated string.
-	                                             	 	 	 	 */
-	char            	indexer_name[INDEXER_MAX_NAME+1]; 		/**< If there is one, the indexer name as a NULL terminated string.
-	                                             	 	 	 	 */
-	Column_reference_t 	associated_column;						/**< If there is one, the reference to the associated column.
-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
-	char 				comments[COMMENTS_MAX_LENGTH+1];		/**< Comments stored as a classical zero end C string.
-												 	 	 	 	 */
+	size_t				header_size;		   				    			/**< Size of the header in bytes.
+	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
+	size_t				data_size;			   				    			/**< Size of the data in bytes.
+	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
+	index_t				line_count;							    			/**< Number of lines of data allocated.
+	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
+	index_t				lines_used;							    			/**< Number of lines of data used.
+	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
+	index_t				nb_elements_per_line;   			   				/**< Number of elements per line.
+	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
+	char				elements_names[ELEMENTS_NAMES_MAX+1];				/**< Names of the line elements with '\0' as separator
+																 	 	 	 *   and '\0\0' as terminal flag.
+																 	 	 	 *	 (default are the indices: "0\01\02\0...\0n\0\0").
+																 	 	 	 */
+	int					elements_names_length;								/**< Length of the character array where the elements names are stored.
+																			 */
+	int  				elements_names_idx[NB_ELTS_MAX_IF_DEFAULT_NAME];	/**< Index for the start of each element name in elements_names.
+																			 */
+	int  				sorted_elements_idx[NB_ELTS_MAX_IF_DEFAULT_NAME];	/**< Index for the sorted element names in elements_names_idx.
+																			 */
+	OBIType_t			returned_data_type;		    						/**< Type of the data that is returned when getting an
+															 	 	 	 	 *   element from the column.
+															 	 	 	 	 */
+	OBIType_t			stored_data_type;		    						/**< Type of the data that is actually stored in the data
+															 	 	 	 	 *   part of the column.
+															 	 	 	 	 */
+	time_t				creation_date;			    						/**< Date of creation of the file.
+	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
+	obiversion_t		version;				   							/**< Version of the column.
+	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
+	obiversion_t		cloned_from;			    						/**< Version of the column from which this column
+															 	 	 	 	 *   was cloned from (-1 if it was not created by cloning
+															 	 	 	 	 *   another column).
+															 	 	 	 	 */
+	char            	name[OBIDMS_COLUMN_MAX_NAME+1]; 	    			/**< The column name as a NULL terminated string.
+	                                             	 	 	 	 	 	 	 */
+	char            	indexer_name[INDEXER_MAX_NAME+1]; 					/**< If there is one, the indexer name as a NULL terminated string.
+	                                             	 	 	 	 	 	 	 */
+	Column_reference_t 	associated_column;									/**< If there is one, the reference to the associated column.
+	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
+	char 				comments[COMMENTS_MAX_LENGTH+1];					/**< Comments stored as a classical zero end C string.
+												 	 	 	 	 	 	 	 */
 } OBIDMS_column_header_t, *OBIDMS_column_header_p;
 
 
@@ -184,12 +194,13 @@ size_t obi_get_platform_header_size();
  * @param nb_lines The number of lines to be stored.
  * @param nb_elements_per_line The number of elements per line.								// TODO talk about default values
  * @param elements_names The names of the elements with ';' as separator (no terminal ';'),
- *                       NULL or "" if the default names are to be used ("0;1;2;...;n").
+ *                       NULL or "" if the default names are to be used ("0\01\02\0...\0n").
  * @param indexer_name The name of the indexer if there is one associated with the column.
  *                     If NULL or "", the indexer name is set as the column name.
  * @param associated_column_name The name of the associated column if there is one.
  * @param associated_column_version The version of the associated column if there is one.
  * @param comments Optional comments associated with the column.
+ * @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
  *
  * @returns A pointer on the newly created column structure.
  * @retval NULL if an error occurred.
@@ -206,7 +217,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 								  const char*  indexer_name,
 								  const char*  associated_column_name,
 								  obiversion_t associated_column_version,
-								  const char*  comments
+								  const char*  comments,
+								  bool		   elt_names_formatted
 								 );
 
 
@@ -353,7 +365,7 @@ int obi_close_header(OBIDMS_column_header_p header);
  * @param element_name The name of the element.
  *
  * @returns The index of the element in a line of the column.
- * @retval OBIIdx_NA if an error occurred.						// TODO not sure if this is "clean".
+ * @retval OBIIdx_NA if an error occurred.
  *
  * @since July 2015
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
@@ -361,6 +373,22 @@ int obi_close_header(OBIDMS_column_header_p header);
 index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
 
 
+/**
+ * @brief Recovers the elements names of the lines of a column, with ';' as separator (i.e. "0;1;2;...;n\0").
+ *
+ * @warning The returned pointer has to be freed by the caller.
+ *
+ * @param column A pointer on an OBIDMS column.
+ *
+ * @returns A pointer on a character array where the elements names are stored.
+ * @retval NULL if an error occurred.
+ *
+ * @since January 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+char* obi_get_elements_names(OBIDMS_column_p column);
+
+
 /**
  * @brief Prepares a column to set a value.
  *
diff --git a/src/obiview.c b/src/obiview.c
index e371ba8..a9b0a86 100644
--- a/src/obiview.c
+++ b/src/obiview.c
@@ -445,8 +445,8 @@ static char* build_obiview_file_name(const char* view_name)
 
 bool view_exists(OBIDMS_p dms, const char* view_name)
 {
-	struct dirent* 	dp;
-	char*			file_name;
+	struct dirent* dp;
+	char*		   file_name;
 
 	// Create file name
 	file_name = build_obiview_file_name(view_name);
@@ -1236,7 +1236,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
 		// If there is a new line selection, build it by combining it with the one from the view to clone if there is one
 		else if (line_selection != NULL)
 		{
-			view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL);
+			view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL, false);
 			if ((view->line_selection) == NULL)
 			{
 				obidebug(1, "\nError creating a column corresponding to a line selection");
@@ -1792,7 +1792,7 @@ int obi_view_add_column(Obiview_p    view,
 	// Open or create the column
 	if (create)
 	{	// Create column
-		column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, indexer_name, associated_column_name, associated_column_version, comments);
+		column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, indexer_name, associated_column_name, associated_column_version, comments, false);
 		if (column == NULL)
 		{
 			obidebug(1, "\nError creating a column to add to a view");

From 7e9932f488d2faddc8d3e81d99d58f743fef9cfc Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Tue, 7 Feb 2017 17:12:56 +0100
Subject: [PATCH 20/22] Fixed a C function declaration

---
 python/obitools3/obidms/capi/obialign.pxd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/obitools3/obidms/capi/obialign.pxd b/python/obitools3/obidms/capi/obialign.pxd
index e76cabe..c2280b9 100644
--- a/python/obitools3/obidms/capi/obialign.pxd
+++ b/python/obitools3/obidms/capi/obialign.pxd
@@ -18,7 +18,8 @@ cdef extern from "obi_align.h" nogil:
                                  double threshold, 
                                  bint normalize, 
                                  int reference, 
-                                 bint similarity_mode)
+                                 bint similarity_mode,
+                                 int thread_count)
 
 
     int obi_lcs_align_two_columns(OBIDMS_p dms,

From a9102620f5dc219b8363c684ffa5db22787b215a Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Tue, 7 Feb 2017 17:14:10 +0100
Subject: [PATCH 21/22] Fixed missing email address

---
 src/obi_align.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/obi_align.h b/src/obi_align.h
index 98da4da..059c528 100644
--- a/src/obi_align.h
+++ b/src/obi_align.h
@@ -4,7 +4,7 @@
 
 /**
  * @file obi_align.h
- * @author Celine Mercier
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
  * @date May 11th 2016
  * @brief Header file for the functions handling the LCS alignment of DNA sequences.
  */
@@ -77,7 +77,7 @@
  * 					the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
  *                  it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
  * @param normalize Whether the score should be normalized with the reference sequence length.
- * @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
+ * @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
  * @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
  *
  * @returns A value indicating the success of the operation.

From e524041013b4c887f8db642d698a7fcbae9ec356 Mon Sep 17 00:00:00 2001
From: Celine Mercier <celine.mercier@metabarcoding.org>
Date: Tue, 7 Feb 2017 17:16:09 +0100
Subject: [PATCH 22/22] Views: Files for unfinished views now have the
 extension '.obiview_unfinished', renamed to '.obiview' when the view is
 finished.

---
 python/obitools3/obidms/_obidms.pyx      |   2 +-
 python/obitools3/obidms/capi/obiview.pxd |   8 +-
 src/obi_align.c                          |  16 +-
 src/obiview.c                            | 533 ++++++++++++++++-------
 src/obiview.h                            |  39 +-
 5 files changed, 384 insertions(+), 214 deletions(-)

diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx
index 943b3e2..51a0d5a 100644
--- a/python/obitools3/obidms/_obidms.pyx
+++ b/python/obitools3/obidms/_obidms.pyx
@@ -690,7 +690,7 @@ cdef class OBIDMS :
         cdef int                  i, j
         cdef str                  column_name
 
-        view_infos_p = obi_view_map_file(self._pointer, str2bytes(view_name))
+        view_infos_p = obi_view_map_file(self._pointer, str2bytes(view_name), True)
         view_infos_d = {}
         view_infos_d["name"] = bytes2str(view_infos_p.name)
         view_infos_d["comments"] = bytes2str(view_infos_p.comments)
diff --git a/python/obitools3/obidms/capi/obiview.pxd b/python/obitools3/obidms/capi/obiview.pxd
index 34521ca..792441b 100644
--- a/python/obitools3/obidms/capi/obiview.pxd
+++ b/python/obitools3/obidms/capi/obiview.pxd
@@ -68,7 +68,7 @@ cdef extern from "obiview.h" nogil:
 
     Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column)
               
-    Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
+    Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bint finished)
 
     int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
 
@@ -94,11 +94,7 @@ cdef extern from "obiview.h" nogil:
 
     OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
 
-    int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
-
-    int obi_save_view(Obiview_p view)
-    
-    int obi_close_view(Obiview_p view)
+    int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)    
     
     int obi_save_and_close_view(Obiview_p view)
 
diff --git a/src/obi_align.c b/src/obi_align.c
index 3fa3678..07a389b 100644
--- a/src/obi_align.c
+++ b/src/obi_align.c
@@ -4,7 +4,7 @@
 
 /**
  * @file obi_align.c
- * @author Celine Mercier
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
  * @date May 4th 2016
  * @brief Functions handling LCS sequence alignments.
  */
@@ -31,10 +31,6 @@
 #define DEBUG_LEVEL 0	// TODO has to be defined somewhere else (cython compil flag?)
 
 
-// TODO
-// use openMP pragmas
-
-
 /**************************************************************************
  *
  * D E C L A R A T I O N   O F   T H E   P R I V A T E   F U N C T I O N S
@@ -618,12 +614,12 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
 	}
 
 	// Close views
-	if (obi_close_view(seq_view) < 0)
+	if (obi_save_and_close_view(seq_view) < 0)
 	{
 		obidebug(1, "\nError closing the input view after aligning");
 		return -1;
 	}
-	if (obi_close_view(output_view) < 0)
+	if (obi_save_and_close_view(output_view) < 0)
 	{
 		obidebug(1, "\nError closing the output view after aligning");
 		return -1;
@@ -963,19 +959,19 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
 	// Close views
 	if (seq2_view != seq1_view)
 	{
-		if (obi_close_view(seq2_view) < 0)
+		if (obi_save_and_close_view(seq2_view) < 0)
 		{
 			obidebug(1, "\nError closing the second input view after aligning");
 			return -1;
 		}
 	}
-	if (obi_close_view(seq1_view) < 0)
+	if (obi_save_and_close_view(seq1_view) < 0)
 	{
 		obidebug(1, "\nError closing the first input view after aligning");
 		return -1;
 	}
 
-	if (obi_close_view(output_view) < 0)
+	if (obi_save_and_close_view(output_view) < 0)
 	{
 		obidebug(1, "\nError closing the output view after aligning");
 		return -1;
diff --git a/src/obiview.c b/src/obiview.c
index a9b0a86..a8f67a2 100644
--- a/src/obiview.c
+++ b/src/obiview.c
@@ -47,7 +47,7 @@
 
 
 /**
- * Internal function building the file name where the informations about an obiview are stored.
+ * Internal function building the file name where the informations about a finished, read-only obiview are stored.
  *
  * @warning The returned pointer has to be freed by the caller.
  *
@@ -63,7 +63,23 @@ static char* build_obiview_file_name(const char* view_name);
 
 
 /**
- * Internal function checking if a view with a given name already exists in a DMS.
+ * Internal function building the file name where the informations about an unfinished, writable obiview are stored.
+ *
+ * @warning The returned pointer has to be freed by the caller.
+ *
+ * @param view_name The name of the view.
+ *
+ * @returns A pointer to the file name.
+ * @retval NULL if an error occurred.
+ *
+ * @since February 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static char* build_unfinished_obiview_file_name(const char* view_name);
+
+
+/**
+ * Internal function checking if a view (either finished or unfinished) with a given name already exists in a DMS.
  *
  * @param dms The DMS.
  * @param view_name The name of the view.
@@ -73,7 +89,7 @@ static char* build_obiview_file_name(const char* view_name);
  * @since September 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-bool view_exists(OBIDMS_p dms, const char* view_name);
+static bool view_exists(OBIDMS_p dms, const char* view_name);
 
 
 /**
@@ -84,7 +100,7 @@ bool view_exists(OBIDMS_p dms, const char* view_name);
  * @since June 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-size_t get_platform_view_file_size();
+static size_t get_platform_view_file_size();
 
 
 /**
@@ -99,7 +115,7 @@ size_t get_platform_view_file_size();
  * @since August 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int enlarge_view_file(Obiview_p view, size_t new_size);
+static int enlarge_view_file(Obiview_p view, size_t new_size);
 
 
 /**
@@ -117,7 +133,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size);
  * @since August 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int write_comments_to_view_file(Obiview_p view, const char* comments);
+static int write_comments_to_view_file(Obiview_p view, const char* comments);
 
 
 /**
@@ -134,7 +150,7 @@ int write_comments_to_view_file(Obiview_p view, const char* comments);
  * @since June 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int create_obiview_file(OBIDMS_p dms, const char* view_name);
+static int create_obiview_file(OBIDMS_p dms, const char* view_name);
 
 
 /**
@@ -156,7 +172,7 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name);
  * @since June 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-void update_column_refs(Obiview_p view);
+static void update_column_refs(Obiview_p view);
 
 
 /**
@@ -175,7 +191,7 @@ void update_column_refs(Obiview_p view);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int create_column_dict(Obiview_p view);
+static int create_column_dict(Obiview_p view);
 
 
 /**
@@ -194,7 +210,7 @@ int create_column_dict(Obiview_p view);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int update_column_dict(Obiview_p view);
+static int update_column_dict(Obiview_p view);
 
 
 /**
@@ -219,7 +235,7 @@ int update_column_dict(Obiview_p view);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int update_column_refs_and_dict(Obiview_p view);
+static int update_column_refs_and_dict(Obiview_p view);
 
 
 /**
@@ -239,7 +255,7 @@ int update_column_refs_and_dict(Obiview_p view);
  * @since February 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int update_lines(Obiview_p view, index_t line_count);
+static int update_lines(Obiview_p view, index_t line_count);
 
 
 /**
@@ -257,7 +273,71 @@ int update_lines(Obiview_p view, index_t line_count);
  * @since February 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
+static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
+
+
+/**
+ * @brief Saves a view, updating its informations in the view file.
+ *
+ * @warning The view must be writable.
+ *
+ * @param view A pointer on the view.
+ *
+ * @returns A value indicating the success of the operation.
+ * @retval 0 if the operation was successfully completed.
+ * @retval -1 if an error occurred.
+ *
+ * @since February 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int save_view(Obiview_p view);
+
+
+/**
+ * @brief Rename a view file once the view is finished, replacing the '*.obiview_unfinished' extension with '*.obiview'.
+ *
+ * @param view A pointer on the view.
+ *
+ * @returns A value indicating the success of the operation.
+ * @retval 0 if the operation was successfully completed.
+ * @retval -1 if an error occurred.
+ *
+ * @since February 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int rename_finished_view(Obiview_p view);
+
+
+/**
+ * @brief Finishes a view: check the predicates, save all the informations, rename the view file.
+ *
+ * @param view A pointer on the view.
+ *
+ * @returns A value indicating the success of the operation.
+ * @retval 0 if the operation was successfully completed.
+ * @retval -1 if an error occurred.
+ *
+ * @since February 2017
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int finish_view(Obiview_p view);
+
+/**
+ * @brief Closes an opened view.
+ *
+ * @warning Doesn't save the view.
+ *
+ * @param view A pointer on the view.
+ *
+ * @returns A value indicating the success of the operation.
+ * @retval 0 if the operation was successfully completed.
+ * @retval -1 if an error occurred.
+ *
+ * @see obi_save_and_close_view()
+ * @since February 2016
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+static int close_view(Obiview_p view);
 
 
 /**
@@ -276,7 +356,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
  * @since April 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p);
+static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p);
 
 
 /**
@@ -294,7 +374,7 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
  * @since April 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
+static int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
 
 
 /****** PREDICATE FUNCTIONS *******/
@@ -313,7 +393,7 @@ int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-char* view_has_nuc_sequence_column(Obiview_p view);
+static char* view_has_nuc_sequence_column(Obiview_p view);
 
 
 /**
@@ -330,7 +410,7 @@ char* view_has_nuc_sequence_column(Obiview_p view);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-char* view_has_quality_column(Obiview_p view);
+static char* view_has_quality_column(Obiview_p view);
 
 
 /**
@@ -347,7 +427,7 @@ char* view_has_quality_column(Obiview_p view);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-char* view_has_id_column(Obiview_p view);
+static char* view_has_id_column(Obiview_p view);
 
 
 /**
@@ -364,7 +444,7 @@ char* view_has_id_column(Obiview_p view);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-char* view_has_definition_column(Obiview_p view);
+static char* view_has_definition_column(Obiview_p view);
 
 
 /**
@@ -381,7 +461,7 @@ char* view_has_definition_column(Obiview_p view);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-char* view_check_qual_match_seqs(Obiview_p view);
+static char* view_check_qual_match_seqs(Obiview_p view);
 
 
 /**
@@ -396,7 +476,7 @@ char* view_check_qual_match_seqs(Obiview_p view);
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view));
+static char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view));
 
 
 /**
@@ -410,7 +490,7 @@ char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obivi
  * @since July 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-char* view_check_all_predicates(Obiview_p view);
+static char* view_check_all_predicates(Obiview_p view);
 
 
 /************************************************************************
@@ -443,11 +523,35 @@ static char* build_obiview_file_name(const char* view_name)
 }
 
 
-bool view_exists(OBIDMS_p dms, const char* view_name)
+static char* build_unfinished_obiview_file_name(const char* view_name)
+{
+	char* file_name;
+
+	// Build file name
+	file_name = (char*) malloc((strlen(view_name) + 19 + 1)*sizeof(char));
+	if (file_name == NULL)
+	{
+		obi_set_errno(OBI_MALLOC_ERROR);
+		obidebug(1, "\nError allocating memory for a view file name");
+		return NULL;
+	}
+	if (sprintf(file_name, "%s.obiview_unfinished", view_name) < 0)
+	{
+		obi_set_errno(OBIVIEW_ERROR);
+		obidebug(1, "\nProblem building an unfinished obiview file name");
+		return NULL;
+	}
+
+	return file_name;
+}
+
+
+static bool view_exists(OBIDMS_p dms, const char* view_name)
 {
 	struct dirent* dp;
 	char*		   file_name;
 
+	// Check finished views
 	// Create file name
 	file_name = build_obiview_file_name(view_name);
 	if (file_name == NULL)
@@ -458,13 +562,38 @@ bool view_exists(OBIDMS_p dms, const char* view_name)
 		if ((dp->d_name)[0] == '.')
 			continue;
 		if (strcmp(dp->d_name, file_name) == 0)
+		{
+			free(file_name);
 			return true;
+		}
 	}
+
+	free(file_name);
+
+	// Check unfinished views
+	// Create file name
+	file_name = build_unfinished_obiview_file_name(view_name);
+	if (file_name == NULL)
+		return -1;
+
+	while ((dp = readdir(dms->view_directory)) != NULL)
+	{
+		if ((dp->d_name)[0] == '.')
+			continue;
+		if (strcmp(dp->d_name, file_name) == 0)
+		{
+			free(file_name);
+			return true;
+		}
+	}
+
+	free(file_name);
+
 	return false;
 }
 
 
-size_t get_platform_view_file_size()
+static size_t get_platform_view_file_size()
 {
 	size_t obiview_size;
 	size_t rounded_obiview_size;
@@ -480,7 +609,7 @@ size_t get_platform_view_file_size()
 }
 
 
-int enlarge_view_file(Obiview_p view, size_t new_size)
+static int enlarge_view_file(Obiview_p view, size_t new_size)
 {
 	int    obiview_file_descriptor;
 	double multiple;
@@ -488,7 +617,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size)
 	char*  file_name;
 
 	// Create file name
-	file_name = build_obiview_file_name((view->infos)->name);
+	file_name = build_unfinished_obiview_file_name((view->infos)->name);
 	if (file_name == NULL)
 		return -1;
 
@@ -556,7 +685,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size)
 }
 
 
-int write_comments_to_view_file(Obiview_p view, const char* comments)
+static int write_comments_to_view_file(Obiview_p view, const char* comments)
 {
 	size_t new_size;
 
@@ -580,14 +709,14 @@ int write_comments_to_view_file(Obiview_p view, const char* comments)
 }
 
 
-int create_obiview_file(OBIDMS_p dms, const char* view_name)
+static int create_obiview_file(OBIDMS_p dms, const char* view_name)
 {
 	char* 				file_name;
 	int 				obiview_file_descriptor;
 	size_t  			file_size;
 
 	// Create file name
-	file_name = build_obiview_file_name(view_name);
+	file_name = build_unfinished_obiview_file_name(view_name);
 	if (file_name == NULL)
 		return -1;
 
@@ -634,7 +763,7 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name)
 }
 
 
-void update_column_refs(Obiview_p view)
+static void update_column_refs(Obiview_p view)
 {
 	int i;
 
@@ -646,7 +775,7 @@ void update_column_refs(Obiview_p view)
 }
 
 
-int create_column_dict(Obiview_p view)
+static int create_column_dict(Obiview_p view)
 {
 	int i;
 
@@ -681,7 +810,7 @@ int create_column_dict(Obiview_p view)
 }
 
 
-int update_column_dict(Obiview_p view)
+static int update_column_dict(Obiview_p view)
 {
 	// Re-initialize the dictionary to rebuild it from scratch
 	ht_free(view->column_dict);
@@ -693,14 +822,14 @@ int update_column_dict(Obiview_p view)
 }
 
 
-int update_column_refs_and_dict(Obiview_p view)
+static int update_column_refs_and_dict(Obiview_p view)
 {
 	update_column_refs(view);
 	return update_column_dict(view);
 }
 
 
-int update_lines(Obiview_p view, index_t line_count)
+static int update_lines(Obiview_p view, index_t line_count)
 {
 	int 			i;
 
@@ -739,7 +868,7 @@ int update_lines(Obiview_p view, index_t line_count)
 }
 
 
-OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
+static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
 {
 	int i;
 	OBIDMS_column_p column = NULL;
@@ -799,7 +928,161 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
 }
 
 
-int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
+static int save_view(Obiview_p view)
+{
+	// Check that the view is not read-only
+	if (view->read_only)
+	{
+		obi_set_errno(OBIVIEW_ERROR);
+		obidebug(1, "\nError trying to save a read-only view");
+		return -1;
+	}
+
+	// Store reference for the line selection associated with that view if there is one
+	if (view->line_selection != NULL)		// Unnecessary in theory, the line selection references are already saved
+	{
+		strcpy(((view->infos)->line_selection).column_name, ((view->line_selection)->header)->name);
+		((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
+		(view->infos)->all_lines = false;
+	}
+	else	// Necessary because line selection could have been deleted if a column was cloned
+	{
+		(((view->infos)->line_selection).column_name)[0] = '\0';
+		((view->infos)->line_selection).version = -1;
+		(view->infos)->all_lines = true;
+	}
+
+	update_column_refs(view);
+
+	return 0;
+}
+
+
+static int rename_finished_view(Obiview_p view)
+{
+	char* old_name;
+	char* new_name;
+	char* path_old_name;
+	char* path_new_name;
+	char* full_path_old_name;
+	char* full_path_new_name;
+
+	old_name = build_unfinished_obiview_file_name((view->infos)->name);
+	new_name = build_obiview_file_name((view->infos)->name);
+
+	path_old_name = malloc(MAX_PATH_LEN);
+	path_new_name = malloc(MAX_PATH_LEN);
+
+	strcpy(path_old_name, "VIEWS/");
+	strcat(path_old_name, old_name);
+
+	strcpy(path_new_name, "VIEWS/");
+	strcat(path_new_name, new_name);
+
+	full_path_old_name = obi_dms_get_full_path(view->dms, path_old_name);
+	full_path_new_name = obi_dms_get_full_path(view->dms, path_new_name);
+
+	if (rename(full_path_old_name, full_path_new_name) < 0)
+	{
+		obi_set_errno(OBIVIEW_ERROR);
+		obidebug(1, "\nError renaming the file of a finished view: %s", full_path_new_name);
+		free(old_name);
+		free(new_name);
+		return -1;
+	}
+
+	free(old_name);
+	free(new_name);
+	free(path_new_name);
+	free(path_old_name);
+	free(full_path_old_name);
+	free(full_path_new_name);
+
+	return 0;
+}
+
+
+static int finish_view(Obiview_p view)
+{
+	char* predicates;
+
+	// Check that the view is not read-only
+	if (view->read_only)
+	{
+		obi_set_errno(OBIVIEW_ERROR);
+		obidebug(1, "\nError trying to save a read-only view");
+		return -1;
+	}
+
+	// Check predicates
+	predicates = view_check_all_predicates(view);
+	if (predicates == NULL)
+	{
+		obidebug(1, "\nView predicates not respected");
+		return -1;	// TODO reverse view (delete files)
+	}
+	else
+	{
+		write_comments_to_view_file(view, predicates);
+		free(predicates);
+	}
+
+	if (save_view(view) < 0)
+		return -1;
+
+	if (rename_finished_view(view) < 0)
+		return -1;
+
+	// Flag the view as finished
+	(view->infos)->finished = true;
+
+	return 0;
+}
+
+
+static int close_view(Obiview_p view)
+{
+	int i;
+	int ret_value;
+
+	ret_value = 0;
+
+	for (i=0; i < ((view->infos)->column_count); i++)
+	{
+		if (obi_close_column((view->columns)[i]) < 0)
+		{
+			obidebug(1, "\nError closing a column while closing a view");
+			ret_value = -1;
+		}
+	}
+
+	// Close line selection if there is one
+	if (view->line_selection != NULL)
+	{
+		if (obi_close_column(view->line_selection) < 0)
+		{
+			obidebug(1, "\nError closing a line selection while closing a view");
+			ret_value = -1;
+		}
+	}
+
+	// Free the column dictionary
+	ht_free(view->column_dict);
+
+	// Unmap view file
+	if (obi_view_unmap_file(view->dms, view->infos) < 0)
+	{
+		obidebug(1, "\nError unmaping a view file while closing a view");
+		ret_value = -1;
+	}
+
+	free(view);
+
+	return ret_value;
+}
+
+
+static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
 {
 	int   i;
 	char* column_name = NULL;
@@ -846,7 +1129,7 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
 }
 
 
-int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
+static int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
 {
 	if (((*line_nb_p)+1) > ((view->infos)->line_count))
 	{
@@ -865,7 +1148,7 @@ int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
 
 /****** PREDICATE FUNCTIONS *******/
 
-char* view_has_nuc_sequence_column(Obiview_p view)
+static char* view_has_nuc_sequence_column(Obiview_p view)
 {
 	char* predicate;
 
@@ -889,7 +1172,7 @@ char* view_has_nuc_sequence_column(Obiview_p view)
 }
 
 
-char* view_has_quality_column(Obiview_p view)
+static char* view_has_quality_column(Obiview_p view)
 {
 	char* predicate;
 
@@ -913,7 +1196,7 @@ char* view_has_quality_column(Obiview_p view)
 }
 
 
-char* view_has_id_column(Obiview_p view)
+static char* view_has_id_column(Obiview_p view)
 {
 	char* predicate;
 
@@ -936,7 +1219,8 @@ char* view_has_id_column(Obiview_p view)
 	}
 }
 
-char* view_has_definition_column(Obiview_p view)
+
+static char* view_has_definition_column(Obiview_p view)
 {
 	char* predicate;
 
@@ -960,7 +1244,7 @@ char* view_has_definition_column(Obiview_p view)
 }
 
 
-char* view_check_qual_match_seqs(Obiview_p view)
+static char* view_check_qual_match_seqs(Obiview_p view)
 {
 	index_t 		i, j, k;
 	index_t			nb_elements_per_line;
@@ -1053,13 +1337,13 @@ char* view_check_qual_match_seqs(Obiview_p view)
 }
 
 
-char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view))
+static char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view))
 {
 	return predicate_function(view);
 }
 
 
-char* view_check_all_predicates(Obiview_p view)
+static char* view_check_all_predicates(Obiview_p view)
 {
 	int    i, j;
 	size_t size_to_allocate;
@@ -1195,7 +1479,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
 	}
 
 	// Map view file
-	view->infos = obi_view_map_file(dms, view_name);
+	view->infos = obi_view_map_file(dms, view_name, false);
 	if (view->infos == NULL)
 	{
 		obidebug(1, "\nError mapping the informations of a new view");
@@ -1305,7 +1589,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
 		if (write_comments_to_view_file(view, clone_comment) < 0)
 		{
 			obidebug(1, "\nError writing comments when creating a view");
-			obi_close_view(view);
+			close_view(view);
 			return NULL;
 		}
 	}
@@ -1341,7 +1625,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
 	if (write_comments_to_view_file(view, comments) < 0)
 	{
 		obidebug(1, "\nError writing comments when creating a view");
-		obi_close_view(view);
+		close_view(view);
 		return NULL;
 	}
 
@@ -1360,7 +1644,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
 	// Create the column dictionary (hash table) associating column names (or aliases) to column pointers
 	if (create_column_dict(view) < 0)
 	{
-		obi_close_view(view);
+		close_view(view);
 		return NULL;
 	}
 
@@ -1409,7 +1693,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
 		return NULL;
 	view = obi_new_view(dms, view_name, view_to_clone, line_selection, comments);
 
-	obi_close_view(view_to_clone);
+	close_view(view_to_clone);
 
 	return view;
 }
@@ -1511,26 +1795,43 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
 		return NULL;
 	view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column);
 
-	obi_close_view(view_to_clone);
+	close_view(view_to_clone);
 
 	return view;
 }
 
 
-Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
+Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bool finished)
 {
 	char*				file_name;
 	Obiview_infos_p		view_infos;
 	int 				obiview_file_descriptor;
 	size_t  			file_size;
+	int 				open_flag;
+	int					mmap_flag;
 
 	// Create file name
-	file_name = build_obiview_file_name(view_name);
+	if (finished)
+		file_name = build_obiview_file_name(view_name);
+	else
+		file_name = build_unfinished_obiview_file_name(view_name);
 	if (file_name == NULL)
 		return NULL;
 
+	// Set flags (read-only or not)
+	if (finished)
+	{
+		open_flag = O_RDONLY;
+		mmap_flag = PROT_READ;
+	}
+	else
+	{
+		open_flag = O_RDWR;
+		mmap_flag = PROT_READ | PROT_WRITE;
+	}
+
     // Open view file
-	obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDWR, 0777);
+	obiview_file_descriptor = openat(dms->view_dir_fd, file_name, open_flag, 0777);
 	if (obiview_file_descriptor < 0)
 	{
 		if (errno == ENOENT)
@@ -1560,7 +1861,7 @@ Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
 	// Map the view infos structure
 	view_infos = mmap(NULL,
 				 	  file_size,
-					  PROT_READ | PROT_WRITE,
+					  mmap_flag,
 					  MAP_SHARED,
 					  obiview_file_descriptor,
 					  0
@@ -1585,17 +1886,20 @@ Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
 
 int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
 {
-	char* 				file_name;
-	int 				obiview_file_descriptor;
-	size_t  			file_size;
+	char* 	file_name;
+	int 	obiview_file_descriptor;
+	size_t  file_size;
 
 	// Get file name
-	file_name = build_obiview_file_name(view_infos->name);
+	if (view_infos->finished)
+		file_name = build_obiview_file_name(view_infos->name);
+	else
+		file_name = build_unfinished_obiview_file_name(view_infos->name);
 	if (file_name == NULL)
 		return -1;
 
 	// Open view file
-	obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDWR, 0777);
+	obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDONLY, 0777);
 	if (obiview_file_descriptor < 0)
 	{
 		obi_set_errno(OBIVIEW_ERROR);
@@ -1661,13 +1965,9 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
 	}
 
 	// Map view file
-	view->infos = obi_view_map_file(dms, view_name);
-
-	// Check that the view is finished and can be opened
-	if ((view->infos)->finished == false)
+	view->infos = obi_view_map_file(dms, view_name, true);
+	if ((view->infos) == NULL)
 	{
-		obidebug(1, "\nError opening a view: the view is not finished");
-		obi_view_unmap_file(view->dms, view->infos);
 		free(view);
 		return NULL;
 	}
@@ -1697,7 +1997,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
 		if (column_pointer == NULL)
 		{
 			obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, column_name, column_version);
-			obi_close_view(view);
+			close_view(view);
 			return NULL;
 		}
 		(view->columns)[i] = column_pointer;
@@ -1713,7 +2013,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
 	if (create_column_dict(view) < 0)
 	{
 		obidebug(1, "\nError creating the column dictionary when opening a view");
-		obi_close_view(view);
+		close_view(view);
 		return NULL;
 	}
 
@@ -1963,103 +2263,16 @@ int obi_view_create_column_alias(Obiview_p view, const char* current_name, const
 }
 
 
-int obi_save_view(Obiview_p view)
-{
-	// Check that the view is not read-only
-	if (view->read_only)
-	{
-		obi_set_errno(OBIVIEW_ERROR);
-		obidebug(1, "\nError trying to save a read-only view");
-		return -1;
-	}
-
-	// Store reference for the line selection associated with that view if there is one
-	if (view->line_selection != NULL)		// Unnecessary in theory, the line selection references are already saved
-	{
-		strcpy(((view->infos)->line_selection).column_name, ((view->line_selection)->header)->name);
-		((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
-		(view->infos)->all_lines = false;
-	}
-	else	// Necessary because line selection could have been deleted if a column was cloned
-	{
-		(((view->infos)->line_selection).column_name)[0] = '\0';
-		((view->infos)->line_selection).version = -1;
-		(view->infos)->all_lines = true;
-	}
-
-	update_column_refs(view);
-
-	return 0;
-}
-
-
-int obi_close_view(Obiview_p view)
-{
-	int i;
-	int ret_value;
-
-	ret_value = 0;
-
-	for (i=0; i < ((view->infos)->column_count); i++)
-	{
-		if (obi_close_column((view->columns)[i]) < 0)
-		{
-			obidebug(1, "\nError closing a column while closing a view");
-			ret_value = -1;
-		}
-	}
-
-	// Close line selection if there is one
-	if (view->line_selection != NULL)
-	{
-		if (obi_close_column(view->line_selection) < 0)
-		{
-			obidebug(1, "\nError closing a line selection while closing a view");
-			ret_value = -1;
-		}
-	}
-
-	// Flag the view as finished
-	(view->infos)->finished = true;
-
-	// Free the column dictionary
-	ht_free(view->column_dict);
-
-	// Unmap view file
-	if (obi_view_unmap_file(view->dms, view->infos) < 0)
-	{
-		obidebug(1, "\nError unmaping a view file while closing a view");
-		ret_value = -1;
-	}
-
-	free(view);
-
-	return ret_value;
-}
-
-
 int obi_save_and_close_view(Obiview_p view)
 {
-	char* predicates;
-
-	if (!(view->read_only))
-	{
-		predicates = view_check_all_predicates(view);
-		if (predicates == NULL)
-		{
-			obidebug(1, "\nView predicates not respected");
-			return -1;	// TODO reverse view (delete files)
-		}
-		else
-		{
-			write_comments_to_view_file(view, predicates);
-			free(predicates);
-		}
-		if (obi_save_view(view) < 0)
+	// Finish and save the view if it is not read-only
+	if ( ! (view->read_only))
+		if (finish_view(view) < 0)
 			return -1;
-	}
-	if (obi_close_view(view) < 0)
+
+	if (close_view(view) < 0)
 		return -1;
+
 	return 0;
 }
 
diff --git a/src/obiview.h b/src/obiview.h
index 641b856..f2750e9 100644
--- a/src/obiview.h
+++ b/src/obiview.h
@@ -242,6 +242,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
  *
  * @param dms A pointer on the OBIDMS.
  * @param view_name The unique name identifying the view.
+ * @param finished Whether the view is finished or not.
  *
  * @returns A pointer on the mapped view infos structure.
  * @retval NULL if an error occurred.
@@ -249,7 +250,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
  * @since June 2016
  * @author Celine Mercier (celine.mercier@metabarcoding.org)
  */
-Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name);
+Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bool finished);
 
 
 /**
@@ -444,42 +445,6 @@ int obi_select_line(Obiview_p view, index_t line_nb);
 int obi_select_lines(Obiview_p view, index_t* line_nbs);
 
 
-/**
- * @brief Saves a view, writing it in the view file.
- *
- * The view is written at the end of the view file, following the latest written view.
- *
- * @warning The view must be writable.
- *
- * @param view A pointer on the view.
- *
- * @returns A value indicating the success of the operation.
- * @retval 0 if the operation was successfully completed.
- * @retval -1 if an error occurred.
- *
- * @since February 2016
- * @author Celine Mercier (celine.mercier@metabarcoding.org)
- */
-int obi_save_view(Obiview_p view);
-
-
-/**
- * @brief Closes an opened view.
- *
- * @warning Uses obi_save_and_close_view() to automatically save the view if it's not already saved in the view file.
- *
- * @param view A pointer on the view.
- *
- * @returns A value indicating the success of the operation.
- * @retval 0 if the operation was successfully completed.
- * @retval -1 if an error occurred.
- *
- * @since February 2016
- * @author Celine Mercier (celine.mercier@metabarcoding.org)
- */
-int obi_close_view(Obiview_p view);
-
-
 /**
  * @brief Closes an opened view, and saves it if it is not read-only (meaning it is not already saved in the view file).
  *