New column type for DNA sequences. Only for those coded on 2 bits (only

'ATGCatgc') for now.
2015-11-19 18:12:48 +01:00
parent e371248567
commit 6ab1c83302
17 changed files with 860 additions and 29 deletions
--- a/src/encode.c
+++ b/src/encode.c
@ -0,0 +1,180 @@
+/****************************************************************************
+ * Encoding functions                                                       *
+ ****************************************************************************/
+
+/**
+ * @file encode.c
+ * @author Celine Mercier
+ * @date November 18th 2015
+ * @brief Functions encoding DNA sequences.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <math.h>
+
+#include "encode.h"
+#include "obiarray.h"
+#include "obidebug.h"
+
+
+#define DEBUG_LEVEL 0	// TODO has to be defined somewhere else (cython compil flag?)
+
+
+// TODO: endianness problem?
+
+
+
+bool only_ATGC(char* seq)
+{
+	char* c = seq;
+
+	while (*c)
+	{
+		if (!((*c == 'A') || \
+			  (*c == 'T') || \
+			  (*c == 'G') || \
+			  (*c == 'C') || \
+			  (*c == 'a') || \
+			  (*c == 't') || \
+			  (*c == 'g') || \
+			  (*c == 'c')))
+		{
+			return 0;
+		}
+		else
+		{
+			c++;
+		}
+	}
+	return 1;
+}
+
+
+byte_t* encode_seq_on_2_bits(char* seq, int32_t length)		// TODO shift = 2
+{
+	byte_t*  seq_b;
+	uint8_t  shift;
+	int32_t  length_b;
+	int32_t   i;
+
+//	fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>Encoding sequence %s", seq);
+
+	length_b = ceil((double) length / (double) 4.0);
+
+//	fprintf(stderr, "\nLength: %d", length_b);
+
+	seq_b = (byte_t*) malloc(length_b * sizeof(byte_t));
+
+	memset(seq_b, 0, length_b);
+
+	for (i=0; i<length; i++)
+	{
+		shift = 6 - 2*(i%4);
+//		fprintf(stderr, "\nshift: %u", shift);
+
+		switch (seq[i])
+		{
+		case 'a':
+		case 'A':
+			seq_b[i/4] |= NUC_A << shift;
+//			fprintf(stderr, "\nIn byte %d, writing A:", i/4);
+//			print_bits(seq_b, length_b);
+			break;
+		case 'c':
+		case 'C':
+			seq_b[i/4] |= NUC_C << shift;
+//			fprintf(stderr, "\nIn byte %d, writing C:", i/4);
+//			print_bits(seq_b, length_b);
+			break;
+		case 'g':
+		case 'G':
+			seq_b[i/4] |= NUC_G << shift;
+//			fprintf(stderr, "\nIn byte %d, writing G:", i/4);
+//			print_bits(seq_b, length_b);
+			break;
+		case 't':
+		case 'T':
+			seq_b[i/4] |= NUC_T << shift;
+//			fprintf(stderr, "\nIn byte %d, writing T:", i/4);
+//			print_bits(seq_b, length_b);
+			break;
+		default:
+			obidebug(1, "\nInvalid nucleotide base when encoding (not [atgcATGC])");
+			return NULL;
+		}
+	}
+
+//	fprintf(stderr, "\n>>>>>>>>>Encoded:");
+//	print_bits(seq_b, length_b);
+
+	return seq_b;
+}
+
+
+char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
+{
+	char*   seq;
+	int32_t  i;
+	uint8_t shift;
+	uint8_t mask;
+	uint8_t nuc;
+
+	seq = (char*) malloc((length_seq+1) * sizeof(char));
+
+	for (i=0; i<length_seq; i++)
+	{
+		shift = 6 - 2*(i % 4);
+		mask = NUC_MASK << shift;
+		nuc = (seq_b[i/4] & mask) >> shift;
+
+		switch (nuc)
+		{
+		case NUC_A:
+			seq[i] = 'a';
+			break;
+		case NUC_C:
+			seq[i] = 'c';
+			break;
+		case NUC_G:
+			seq[i] = 'g';
+			break;
+		case NUC_T:
+			seq[i] = 't';
+			break;
+		default:
+			obidebug(1, "\nInvalid nucleotide base when decoding");
+			return NULL;
+		}
+	}
+
+	seq[length_seq] = '\0';
+
+	return seq;
+}
+
+
+////////// FOR DEBUGGING ///////////
+
+// little endian
+void print_bits(void* ptr, int32_t size)
+{
+	uint8_t* b = (uint8_t*) ptr;
+	uint8_t byte;
+    int32_t i, j;
+
+    fprintf(stderr, "\n");
+    for (i=0;i<size;i++)
+    {
+        for (j=7;j>=0;j--)
+        {
+            byte = b[i] & (1<<j);
+            byte >>= j;
+            fprintf(stderr, "%u", byte);
+        }
+        fprintf(stderr, " ");
+    }
+    fprintf(stderr, "\n");
+}
--- a/src/encode.h
+++ b/src/encode.h
@ -0,0 +1,95 @@
+/****************************************************************************
+ * Encoding header file	                                                    *
+ ****************************************************************************/
+
+/**
+ * @file encode.h
+ * @author Celine Mercier
+ * @date November 18th 2015
+ * @brief Header file for encoding DNA sequences.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "obiarray.h"
+
+
+#define NUC_MASK 0x3   /**< Binary: 11 to use when decoding */
+
+
+/**
+ * @brief enum for the 2-bits codes for each of the 4 nucleotides.
+ */
+enum
+{
+    NUC_A = 0x0,   /* binary: 00 */
+    NUC_C = 0x1,   /* binary: 01 */
+    NUC_G = 0x2,   /* binary: 10 */
+    NUC_T = 0x3,   /* binary: 11 */
+};
+
+
+/**
+ * @brief Checks if there are only 'atgcATGC' characters in a
+ *        character string.
+ *
+ * @param seq The sequence to check.
+ *
+ * @returns A boolean value indicating if there are only
+ *          'atgcATGC' characters in a character string.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+bool only_ATGC(char* seq);
+
+
+/**
+ * @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
+ *
+ *    A or a : 00
+ *    C or c : 01
+ *    T or t : 10
+ *    G or g : 11
+ *
+ * @warning The DNA sequence must contain only 'atgcATGC' characters.
+ *
+ * @param seq The sequence to encode.
+ * @param length The length of the sequence to encode.
+ *
+ * @returns The encoded sequence.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+byte_t* encode_seq_on_2_bits(char* seq, int32_t length);
+
+
+/**
+ * @brief Decodes a DNA sequence that is coded with each nucleotide on 2 bits.
+ *
+ *    A or a : 00
+ *    C or c : 01
+ *    T or t : 10
+ *    G or g : 11
+ *
+ * @param seq The sequence to decode.
+ * @param length_seq The initial length of the sequence before it was encoded.
+ *
+ * @returns The decoded sequence ended with '\0'.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
+
+
+////////// FOR DEBUGGING ///////////
+
+// little endian
+void print_bits(void* ptr, int32_t length);
+
--- a/src/obiarray.c
+++ b/src/obiarray.c
@ -24,6 +24,7 @@
 #include "obitypes.h"
 #include "obidebug.h"
 #include "private_at_functions.h"
+#include "encode.h"


 #define DEBUG_LEVEL 0	// TODO has to be defined somewhere else (cython compil flag?)
@ -446,6 +447,8 @@ int array_compare(byte_t* value_1, byte_t* value_2)
 	uint8_t size_2;
 	int32_t len_1;
 	int32_t len_2;
+	int32_t ini_len_1;
+	int32_t ini_len_2;
 	int32_t b;

 	//obidebug(1, "\nCOMPARING 1=%d,%.*s; 2=%d,%.*s", *((int32_t*)(value_1+1)), *((int32_t*)(value_1+1)), value_1+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_2+1)), *((int32_t*)(value_2+1)), value_2+BYTE_ARRAY_HEADER_SIZE);
@ -462,6 +465,15 @@ int array_compare(byte_t* value_1, byte_t* value_2)
 	if (len_1 != len_2)
 		return (len_1 - len_2);

+	if (size_1 != 8)
+	{
+		ini_len_1 = *((int32_t*)(value_1+5));
+		ini_len_2 = *((int32_t*)(value_2+5));
+
+		if (ini_len_1 != ini_len_2)
+			return (ini_len_1 - ini_len_2);
+	}
+
 	b = BYTE_ARRAY_HEADER_SIZE;
 	comp = 0;
 	while (!comp && (b < len_1+BYTE_ARRAY_HEADER_SIZE))
@ -475,7 +487,7 @@ int array_compare(byte_t* value_1, byte_t* value_2)

 size_t array_sizeof(byte_t* value)
 {
-	return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)) + 1);
+	return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)));
 }


@ -995,6 +1007,8 @@ index_t obi_array_add(OBIDMS_array_p array, byte_t* value)
 	(array->first)[idx] = data_size_used;

 	// Store the value itself at the end of the data
+//	fprintf(stderr, "\nMEMCOPYING TO STORE, with size %ld :", value_size);
+//	printBits(value_size, value);
 	memcpy((((array->data)->data)+data_size_used), value, value_size);

 	// Update the data size
@ -1079,8 +1093,8 @@ byte_t* obi_str_to_obibytes(char* value)
 	uint8_t size;

 	size = 8;
-	length = strlen(value);
-	value_b = (byte_t*) malloc(length + BYTE_ARRAY_HEADER_SIZE + 1);
+	length = strlen(value) + 1;		// +1 to store \0 at the end (makes retrieving faster)
+	value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length);
 	if (value_b == NULL)
 	{
 		obi_set_errno(OBI_ARRAY_ERROR);
@ -1090,7 +1104,8 @@ byte_t* obi_str_to_obibytes(char* value)

 	*(value_b) = size;

-	*((int32_t*)(value_b+1)) = length;
+	*((int32_t*)(value_b+1)) = length;	// TODO comment
+	*((int32_t*)(value_b+5)) = length;

 	strcpy(value_b+BYTE_ARRAY_HEADER_SIZE, value);

@ -1107,3 +1122,73 @@ const char* obi_obibytes_to_str(byte_t* value_b)
 	return value;
 }

+
+byte_t* obi_seq_to_obibytes(char* seq)
+{
+	byte_t*  value_b;
+	int32_t  length;	   // length of the value (without the header) in bytes
+	uint8_t  size;	  	   // size of one element in bits
+	int32_t  seq_length;
+	byte_t*  encoded_seq;
+
+	// Check if just ATGC and set size of a nucleotide accordingly (2 bits or 4 bits)
+	//fprintf(stderr, "\nonly ATGC = %d", only_ATGC(seq));
+	if (only_ATGC(seq))
+		size = 2;
+	else
+		size = 4;
+
+	// Set length
+	seq_length = strlen(seq);
+	if (size == 2)
+		length = ceil((double) seq_length / (double) 4.0);
+	else	// size == 4
+		length = ceil((double) seq_length / (double) 2.0);
+
+	// Encode
+	if (size == 2)
+		encoded_seq = encode_seq_on_2_bits(seq, seq_length);
+	else	// size == 4
+		return NULL;
+		//		encoded_seq = encode_seq_on_4_bits(seq, seq_length);
+
+	// Set the values in the byte array
+	value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length);
+
+	*(value_b) = size;
+	*((int32_t*)(value_b+1)) = length;
+	*((int32_t*)(value_b+5)) = seq_length;
+
+	//fprintf(stderr, "\nstored seq length : %d\n", *((int32_t*)(value_b+5)));
+
+	memcpy(value_b+BYTE_ARRAY_HEADER_SIZE, encoded_seq, length);
+
+	//obidebug(1, "\n\nENCODED VALUE_B = ");
+	//printBits(((*((int32_t*)(value_b+1)))+BYTE_ARRAY_HEADER_SIZE), value_b);
+
+	free(encoded_seq);
+
+	return value_b;
+}
+
+
+const char* obi_obibytes_to_seq(byte_t* value_b)
+{
+	const char* value;
+	uint8_t size;	    // size of one element in bits
+
+	//obidebug(1, "\n\nGONNA DECODE VALUE_B = ");
+	//printBits(((*((int32_t*)(value_b+1)))+BYTE_ARRAY_HEADER_SIZE), value_b);
+
+	size = *(value_b);
+
+	// Decode
+	if (size == 2)
+		value = decode_seq_on_2_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5)));
+	else
+		return NULL;
+//		value = decode_seq_on_4_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5)));
+
+	return value;
+}
+
--- a/src/obiarray.h
+++ b/src/obiarray.h
@ -29,7 +29,7 @@
                              	  	 */
 #define ARRAY_GROWTH_FACTOR (2)		/**< The growth factor when an array is enlarged.
                                	 */
-#define BYTE_ARRAY_HEADER_SIZE (5)  /**< The size of the header of a byte array.
+#define BYTE_ARRAY_HEADER_SIZE (9)  /**< The size of the header of a byte array.
                              	  	 */


@ -284,5 +284,34 @@ byte_t* obi_str_to_obibytes(char* value);
 const char* obi_obibytes_to_str(byte_t* value_b);


+/**
+ * @brief Converts a DNA sequence to a byte array with a header.
+ *
+ * @warning The byte array must be freed by the caller.
+ *
+ * @param value The DNA sequence to convert.
+ *
+ * @returns A pointer to the byte array created.
+ * @retval NULL if an error occurred.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+byte_t* obi_seq_to_obibytes(char* seq);
+
+
+/**
+ * @brief Converts a byte array to a DNA sequence.
+ *
+ * @param value_b The byte array to convert.
+ *
+ * @returns A pointer to the DNA sequence contained in the byte array.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+const char* obi_obibytes_to_seq(byte_t* value_b);
+
+
 #endif /* OBIARRAY_H_ */

--- a/src/obidmscolumn.c
+++ b/src/obidmscolumn.c
@ -533,12 +533,12 @@ OBIDMS_column_p obi_create_column(OBIDMS_p    dms,
 		obidebug(1, "\nCan't create column because of empty column name");
 		return NULL;
 	}
-	if ((data_type < 1) || (data_type > 5))
+	if ((data_type < 1) || (data_type > 6))
 	{
 		obidebug(1, "\nCan't create column because of invalid data type");
 		return NULL;
 	}
-	if ((data_type == 5) && (array_name == NULL))
+	if (((data_type == 5) || (data_type == 6)) && (array_name == NULL))
 	{
 		obidebug(1, "\nCan't create column because of empty array name");
 		return NULL;
@ -701,8 +701,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p    dms,
 	if (comments != NULL)
 		strncpy(header->comments, comments, COMMENTS_MAX_LENGTH);

-	// If the data type is OBI_IDX, the associated obi_array is opened or created
-	if (data_type == 5)
+	// If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created
+	if ((data_type == 5) || (data_type == 6))
 	{
 		array = obi_array(dms, array_name);
 		if (array == NULL)
@ -838,8 +838,8 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,

 	column->writable  = false;

-	// If the data type is OBI_IDX, the associated obi_array is opened or created
-	if ((column->header)->data_type == 5)
+	// If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created
+	if (((column->header)->data_type == 5) || ((column->header)->data_type == 6))
 	{
 		array = obi_array(dms, (column->header)->array_name);
 		if (array == NULL)
@ -1175,7 +1175,8 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
 					}
 					break;

-	case OBI_IDX:   for (i=start;i<end;i++)
+	case OBI_STR:
+	case OBI_SEQ:   for (i=start;i<end;i++)
 					{
 						*(((index_t*) (column->data)) + i) = OBIIdx_NA;
 					}
--- a/src/obidmscolumn_seq.c
+++ b/src/obidmscolumn_seq.c
@ -0,0 +1,120 @@
+/****************************************************************************
+ * OBIDMS_column_seq functions                                              *
+ ****************************************************************************/
+
+/**
+ * @file obidsmcolumn_seq.c
+ * @author Celine Mercier
+ * @date November 18th 2015
+ * @brief Functions handling OBIColumns containing data in the form of indices referring to DNA sequences.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "obidmscolumn.h"
+#include "obitypes.h"
+#include "obierrno.h"
+#include "obidebug.h"
+#include "obiarray.h"
+
+
+#define DEBUG_LEVEL 0	// TODO has to be defined somewhere else (cython compil flag?)
+
+
+/**********************************************************************
+ *
+ * D E F I N I T I O N   O F   T H E   P U B L I C   F U N C T I O N S
+ *
+ **********************************************************************/
+
+int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value)
+{
+	byte_t* value_b;
+	index_t idx;
+
+	// Check that the line number is not greater than the maximum allowed
+	if (line_nb >= MAXIMUM_LINE_COUNT)
+	{
+		obi_set_errno(OBICOL_UNKNOWN_ERROR);
+		obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
+    	return -1;
+	}
+
+	// Check if the file needs to be enlarged
+	while ((line_nb+1) > (column->header)->line_count)
+	{
+		// Enlarge the file
+		if (obi_enlarge_column(column) < 0)
+	    	return -1;
+	}
+
+	// Update lines used
+	if ((line_nb+1) > (column->header)->lines_used)
+		(column->header)->lines_used = line_nb+1;
+
+	// Encode the value on a byte array with a header
+	value_b = obi_seq_to_obibytes(value);
+	if (value_b == NULL)
+		return -1;
+
+	// Add in the obiarray
+	idx = obi_array_add(column->array, value_b);
+	if (idx == -1)
+		return -1;
+
+	// Add the value's index in the column
+	*(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = idx;
+
+	free(value_b);
+
+	return 0;
+}
+
+
+const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
+{
+	index_t idx;
+	byte_t* value_b;
+
+	if ((line_nb+1) > (column->header)->lines_used)
+	{
+		obi_set_errno(OBICOL_UNKNOWN_ERROR);
+		obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
+		return "\0";		// TODO
+	}
+
+	idx = *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
+
+	// Check NA
+	if (idx == OBIIdx_NA)
+		return "\0";		// TODO
+
+	value_b = obi_array_get(column->array, idx);
+	return obi_obibytes_to_seq(value_b);
+}
+
+
+int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, char* value)
+{
+	index_t element_idx;
+	element_idx = obi_column_get_element_index_from_name(column, element_name);
+	if (element_idx == OBIIdx_NA)
+		return -1;
+	if (obi_column_set_obiseq_with_elt_idx(column, line_nb, element_idx, value) < 0)
+		return -1;
+	return 0;
+}
+
+
+const char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
+{
+	index_t element_idx;
+
+	element_idx = obi_column_get_element_index_from_name(column, element_name);
+	if (element_idx == OBIIdx_NA)
+		return "\0";
+	return obi_column_get_obiseq_with_elt_idx(column, line_nb, element_idx);
+}
+
--- a/src/obidmscolumn_seq.h
+++ b/src/obidmscolumn_seq.h
@ -0,0 +1,101 @@
+/****************************************************************************
+ * OBIDMS_column_seq header file                                            *
+ ****************************************************************************/
+
+/**
+ * @file obidsmcolumn_seq.h
+ * @author Celine Mercier
+ * @date Novemeber 18th 2015
+ * @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to DNA sequences.
+ */
+
+
+#ifndef OBIDMSCOLUMN_SEQ_H_
+#define OBIDMSCOLUMN_SEQ_H_
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "obidmscolumn.h"
+#include "obitypes.h"
+
+
+/**
+ * @brief Sets a value in an OBIDMS column containing data in the form of indices referring
+ * to DNA sequences in an obiarray, using the index of the element in the line.
+ *
+ * @warning Pointers returned by obi_open_column() don't allow writing.
+ *
+ * @param column A pointer as returned by obi_create_column() or obi_clone_column().
+ * @param line_nb The number of the line where the value should be set.
+ * @param element_idx The index of the element that should be set in the line.
+ * @param value The value that should be set.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value);
+
+
+/**
+ * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
+ * to DNA sequences in an obiarray, using the index of the element in the line.
+ *
+ * @param column A pointer as returned by obi_create_column().
+ * @param line_nb The number of the line where the value should be recovered.
+ * @param element_idx The index of the element that should be recovered in the line.
+ *
+ * @returns The recovered value.
+ * @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
+
+
+/**
+ * @brief Sets a value in an OBIDMS column containing data in the form of indices referring
+ * to DNA sequences in an obiarray, using the name of the element in the line.
+ *
+ * @warning Pointers returned by obi_open_column() don't allow writing.
+ *
+ * @param column A pointer as returned by obi_create_column() or obi_clone_column().
+ * @param line_nb The number of the line where the value should be set.
+ * @param element_name The name of the element that should be set in the line.
+ * @param value The value that should be set.
+ *
+ * @returns An integer value indicating the success of the operation.
+ * @retval 0 on success.
+ * @retval -1 if an error occurred.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, char* value);
+
+
+/**
+ * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
+ * to DNA sequences in an obiarray, using the name of the element in the line.
+ *
+ * @param column A pointer as returned by obi_create_column() or obi_clone_column().
+ * @param line_nb The number of the line where the value should be recovered.
+ * @param element_name The name of the element that should be recovered in the line.
+ *
+ * @returns The recovered value.
+ * @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
+ *
+ * @since November 2015
+ * @author Celine Mercier (celine.mercier@metabarcoding.org)
+ */
+const char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
+
+
+#endif /* OBIDMSCOLUMN_SEQ_H_ */
+
--- a/src/obidmscolumn_str.h
+++ b/src/obidmscolumn_str.h
@ -97,5 +97,5 @@ int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb,
 const char* obi_column_get_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);


-#endif /* OBIDMSCOLUMN_IDX_H_ */
+#endif /* OBIDMSCOLUMN_STR_H_ */

--- a/src/obitypes.c
+++ b/src/obitypes.c
@ -40,7 +40,10 @@ size_t obi_sizeof(OBIType_t type)
 	case OBI_CHAR:  size = sizeof(obichar_t);
 					break;

-	case OBI_IDX:   size = sizeof(index_t);
+	case OBI_STR:   size = sizeof(index_t);
+					break;
+
+	case OBI_SEQ:   size = sizeof(index_t);
 					break;

 	default:        size = 0;
@ -90,7 +93,10 @@ char* name_data_type(int data_type)
 		case OBI_CHAR:  name = strdup("OBI_CHAR");
 						break;

-		case OBI_IDX:   name = strdup("OBI_IDX");
+		case OBI_STR:   name = strdup("OBI_STR");
+						break;
+
+		case OBI_SEQ:   name = strdup("OBI_SEQ");
 						break;
 	}

--- a/src/obitypes.h
+++ b/src/obitypes.h
@ -44,7 +44,8 @@ typedef enum OBIType {
 	OBI_FLOAT,					/**< a floating value (C type : double) */
 	OBI_BOOL,					/**< a boolean true/false value, see obibool_t enum */
 	OBI_CHAR,					/**< a character (C type : char) */
-	OBI_IDX					    /**< an index in a data structure (C type : int64_t) */
+	OBI_STR,				    /**< an index in a data structure (C type : int64_t) referring to a character string*/
+	OBI_SEQ					    /**< an index in a data structure (C type : int64_t) referring to a DNA sequence*/
 } OBIType_t, *OBIType_p;


@ -52,7 +53,7 @@ typedef int64_t index_t;
 typedef int32_t obiint_t;
 typedef double obifloat_t;
 typedef char obichar_t;
-
+// TODO same for obistr_t and obiseq_t ?

 /**
 * @brief Union used to compute the NA value of the OBI_FLOAT OBIType.