New column type for DNA sequences. Only for those coded on 2 bits (only
'ATGCatgc') for now.
This commit is contained in:
180
src/encode.c
Normal file
180
src/encode.c
Normal file
@ -0,0 +1,180 @@
|
||||
/****************************************************************************
|
||||
* Encoding functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file encode.c
|
||||
* @author Celine Mercier
|
||||
* @date November 18th 2015
|
||||
* @brief Functions encoding DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "encode.h"
|
||||
#include "obiarray.h"
|
||||
#include "obidebug.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
// TODO: endianness problem?
|
||||
|
||||
|
||||
|
||||
bool only_ATGC(char* seq)
|
||||
{
|
||||
char* c = seq;
|
||||
|
||||
while (*c)
|
||||
{
|
||||
if (!((*c == 'A') || \
|
||||
(*c == 'T') || \
|
||||
(*c == 'G') || \
|
||||
(*c == 'C') || \
|
||||
(*c == 'a') || \
|
||||
(*c == 't') || \
|
||||
(*c == 'g') || \
|
||||
(*c == 'c')))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
c++;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
byte_t* encode_seq_on_2_bits(char* seq, int32_t length) // TODO shift = 2
|
||||
{
|
||||
byte_t* seq_b;
|
||||
uint8_t shift;
|
||||
int32_t length_b;
|
||||
int32_t i;
|
||||
|
||||
// fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>Encoding sequence %s", seq);
|
||||
|
||||
length_b = ceil((double) length / (double) 4.0);
|
||||
|
||||
// fprintf(stderr, "\nLength: %d", length_b);
|
||||
|
||||
seq_b = (byte_t*) malloc(length_b * sizeof(byte_t));
|
||||
|
||||
memset(seq_b, 0, length_b);
|
||||
|
||||
for (i=0; i<length; i++)
|
||||
{
|
||||
shift = 6 - 2*(i%4);
|
||||
// fprintf(stderr, "\nshift: %u", shift);
|
||||
|
||||
switch (seq[i])
|
||||
{
|
||||
case 'a':
|
||||
case 'A':
|
||||
seq_b[i/4] |= NUC_A << shift;
|
||||
// fprintf(stderr, "\nIn byte %d, writing A:", i/4);
|
||||
// print_bits(seq_b, length_b);
|
||||
break;
|
||||
case 'c':
|
||||
case 'C':
|
||||
seq_b[i/4] |= NUC_C << shift;
|
||||
// fprintf(stderr, "\nIn byte %d, writing C:", i/4);
|
||||
// print_bits(seq_b, length_b);
|
||||
break;
|
||||
case 'g':
|
||||
case 'G':
|
||||
seq_b[i/4] |= NUC_G << shift;
|
||||
// fprintf(stderr, "\nIn byte %d, writing G:", i/4);
|
||||
// print_bits(seq_b, length_b);
|
||||
break;
|
||||
case 't':
|
||||
case 'T':
|
||||
seq_b[i/4] |= NUC_T << shift;
|
||||
// fprintf(stderr, "\nIn byte %d, writing T:", i/4);
|
||||
// print_bits(seq_b, length_b);
|
||||
break;
|
||||
default:
|
||||
obidebug(1, "\nInvalid nucleotide base when encoding (not [atgcATGC])");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// fprintf(stderr, "\n>>>>>>>>>Encoded:");
|
||||
// print_bits(seq_b, length_b);
|
||||
|
||||
return seq_b;
|
||||
}
|
||||
|
||||
|
||||
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
|
||||
{
|
||||
char* seq;
|
||||
int32_t i;
|
||||
uint8_t shift;
|
||||
uint8_t mask;
|
||||
uint8_t nuc;
|
||||
|
||||
seq = (char*) malloc((length_seq+1) * sizeof(char));
|
||||
|
||||
for (i=0; i<length_seq; i++)
|
||||
{
|
||||
shift = 6 - 2*(i % 4);
|
||||
mask = NUC_MASK << shift;
|
||||
nuc = (seq_b[i/4] & mask) >> shift;
|
||||
|
||||
switch (nuc)
|
||||
{
|
||||
case NUC_A:
|
||||
seq[i] = 'a';
|
||||
break;
|
||||
case NUC_C:
|
||||
seq[i] = 'c';
|
||||
break;
|
||||
case NUC_G:
|
||||
seq[i] = 'g';
|
||||
break;
|
||||
case NUC_T:
|
||||
seq[i] = 't';
|
||||
break;
|
||||
default:
|
||||
obidebug(1, "\nInvalid nucleotide base when decoding");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
seq[length_seq] = '\0';
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
|
||||
////////// FOR DEBUGGING ///////////
|
||||
|
||||
// little endian
|
||||
void print_bits(void* ptr, int32_t size)
|
||||
{
|
||||
uint8_t* b = (uint8_t*) ptr;
|
||||
uint8_t byte;
|
||||
int32_t i, j;
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
for (i=0;i<size;i++)
|
||||
{
|
||||
for (j=7;j>=0;j--)
|
||||
{
|
||||
byte = b[i] & (1<<j);
|
||||
byte >>= j;
|
||||
fprintf(stderr, "%u", byte);
|
||||
}
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
95
src/encode.h
Normal file
95
src/encode.h
Normal file
@ -0,0 +1,95 @@
|
||||
/****************************************************************************
|
||||
* Encoding header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file encode.h
|
||||
* @author Celine Mercier
|
||||
* @date November 18th 2015
|
||||
* @brief Header file for encoding DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "obiarray.h"
|
||||
|
||||
|
||||
#define NUC_MASK 0x3 /**< Binary: 11 to use when decoding */
|
||||
|
||||
|
||||
/**
|
||||
* @brief enum for the 2-bits codes for each of the 4 nucleotides.
|
||||
*/
|
||||
enum
|
||||
{
|
||||
NUC_A = 0x0, /* binary: 00 */
|
||||
NUC_C = 0x1, /* binary: 01 */
|
||||
NUC_G = 0x2, /* binary: 10 */
|
||||
NUC_T = 0x3, /* binary: 11 */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if there are only 'atgcATGC' characters in a
|
||||
* character string.
|
||||
*
|
||||
* @param seq The sequence to check.
|
||||
*
|
||||
* @returns A boolean value indicating if there are only
|
||||
* 'atgcATGC' characters in a character string.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
bool only_ATGC(char* seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
|
||||
*
|
||||
* A or a : 00
|
||||
* C or c : 01
|
||||
* T or t : 10
|
||||
* G or g : 11
|
||||
*
|
||||
* @warning The DNA sequence must contain only 'atgcATGC' characters.
|
||||
*
|
||||
* @param seq The sequence to encode.
|
||||
* @param length The length of the sequence to encode.
|
||||
*
|
||||
* @returns The encoded sequence.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* encode_seq_on_2_bits(char* seq, int32_t length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Decodes a DNA sequence that is coded with each nucleotide on 2 bits.
|
||||
*
|
||||
* A or a : 00
|
||||
* C or c : 01
|
||||
* T or t : 10
|
||||
* G or g : 11
|
||||
*
|
||||
* @param seq The sequence to decode.
|
||||
* @param length_seq The initial length of the sequence before it was encoded.
|
||||
*
|
||||
* @returns The decoded sequence ended with '\0'.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
|
||||
|
||||
|
||||
////////// FOR DEBUGGING ///////////
|
||||
|
||||
// little endian
|
||||
void print_bits(void* ptr, int32_t length);
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "obitypes.h"
|
||||
#include "obidebug.h"
|
||||
#include "private_at_functions.h"
|
||||
#include "encode.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
@ -446,6 +447,8 @@ int array_compare(byte_t* value_1, byte_t* value_2)
|
||||
uint8_t size_2;
|
||||
int32_t len_1;
|
||||
int32_t len_2;
|
||||
int32_t ini_len_1;
|
||||
int32_t ini_len_2;
|
||||
int32_t b;
|
||||
|
||||
//obidebug(1, "\nCOMPARING 1=%d,%.*s; 2=%d,%.*s", *((int32_t*)(value_1+1)), *((int32_t*)(value_1+1)), value_1+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_2+1)), *((int32_t*)(value_2+1)), value_2+BYTE_ARRAY_HEADER_SIZE);
|
||||
@ -462,6 +465,15 @@ int array_compare(byte_t* value_1, byte_t* value_2)
|
||||
if (len_1 != len_2)
|
||||
return (len_1 - len_2);
|
||||
|
||||
if (size_1 != 8)
|
||||
{
|
||||
ini_len_1 = *((int32_t*)(value_1+5));
|
||||
ini_len_2 = *((int32_t*)(value_2+5));
|
||||
|
||||
if (ini_len_1 != ini_len_2)
|
||||
return (ini_len_1 - ini_len_2);
|
||||
}
|
||||
|
||||
b = BYTE_ARRAY_HEADER_SIZE;
|
||||
comp = 0;
|
||||
while (!comp && (b < len_1+BYTE_ARRAY_HEADER_SIZE))
|
||||
@ -475,7 +487,7 @@ int array_compare(byte_t* value_1, byte_t* value_2)
|
||||
|
||||
size_t array_sizeof(byte_t* value)
|
||||
{
|
||||
return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)) + 1);
|
||||
return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)));
|
||||
}
|
||||
|
||||
|
||||
@ -995,6 +1007,8 @@ index_t obi_array_add(OBIDMS_array_p array, byte_t* value)
|
||||
(array->first)[idx] = data_size_used;
|
||||
|
||||
// Store the value itself at the end of the data
|
||||
// fprintf(stderr, "\nMEMCOPYING TO STORE, with size %ld :", value_size);
|
||||
// printBits(value_size, value);
|
||||
memcpy((((array->data)->data)+data_size_used), value, value_size);
|
||||
|
||||
// Update the data size
|
||||
@ -1079,8 +1093,8 @@ byte_t* obi_str_to_obibytes(char* value)
|
||||
uint8_t size;
|
||||
|
||||
size = 8;
|
||||
length = strlen(value);
|
||||
value_b = (byte_t*) malloc(length + BYTE_ARRAY_HEADER_SIZE + 1);
|
||||
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
|
||||
value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length);
|
||||
if (value_b == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_ARRAY_ERROR);
|
||||
@ -1090,7 +1104,8 @@ byte_t* obi_str_to_obibytes(char* value)
|
||||
|
||||
*(value_b) = size;
|
||||
|
||||
*((int32_t*)(value_b+1)) = length;
|
||||
*((int32_t*)(value_b+1)) = length; // TODO comment
|
||||
*((int32_t*)(value_b+5)) = length;
|
||||
|
||||
strcpy(value_b+BYTE_ARRAY_HEADER_SIZE, value);
|
||||
|
||||
@ -1107,3 +1122,73 @@ const char* obi_obibytes_to_str(byte_t* value_b)
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
byte_t* obi_seq_to_obibytes(char* seq)
|
||||
{
|
||||
byte_t* value_b;
|
||||
int32_t length; // length of the value (without the header) in bytes
|
||||
uint8_t size; // size of one element in bits
|
||||
int32_t seq_length;
|
||||
byte_t* encoded_seq;
|
||||
|
||||
// Check if just ATGC and set size of a nucleotide accordingly (2 bits or 4 bits)
|
||||
//fprintf(stderr, "\nonly ATGC = %d", only_ATGC(seq));
|
||||
if (only_ATGC(seq))
|
||||
size = 2;
|
||||
else
|
||||
size = 4;
|
||||
|
||||
// Set length
|
||||
seq_length = strlen(seq);
|
||||
if (size == 2)
|
||||
length = ceil((double) seq_length / (double) 4.0);
|
||||
else // size == 4
|
||||
length = ceil((double) seq_length / (double) 2.0);
|
||||
|
||||
// Encode
|
||||
if (size == 2)
|
||||
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
|
||||
else // size == 4
|
||||
return NULL;
|
||||
// encoded_seq = encode_seq_on_4_bits(seq, seq_length);
|
||||
|
||||
// Set the values in the byte array
|
||||
value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length);
|
||||
|
||||
*(value_b) = size;
|
||||
*((int32_t*)(value_b+1)) = length;
|
||||
*((int32_t*)(value_b+5)) = seq_length;
|
||||
|
||||
//fprintf(stderr, "\nstored seq length : %d\n", *((int32_t*)(value_b+5)));
|
||||
|
||||
memcpy(value_b+BYTE_ARRAY_HEADER_SIZE, encoded_seq, length);
|
||||
|
||||
//obidebug(1, "\n\nENCODED VALUE_B = ");
|
||||
//printBits(((*((int32_t*)(value_b+1)))+BYTE_ARRAY_HEADER_SIZE), value_b);
|
||||
|
||||
free(encoded_seq);
|
||||
|
||||
return value_b;
|
||||
}
|
||||
|
||||
|
||||
const char* obi_obibytes_to_seq(byte_t* value_b)
|
||||
{
|
||||
const char* value;
|
||||
uint8_t size; // size of one element in bits
|
||||
|
||||
//obidebug(1, "\n\nGONNA DECODE VALUE_B = ");
|
||||
//printBits(((*((int32_t*)(value_b+1)))+BYTE_ARRAY_HEADER_SIZE), value_b);
|
||||
|
||||
size = *(value_b);
|
||||
|
||||
// Decode
|
||||
if (size == 2)
|
||||
value = decode_seq_on_2_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5)));
|
||||
else
|
||||
return NULL;
|
||||
// value = decode_seq_on_4_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5)));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
*/
|
||||
#define ARRAY_GROWTH_FACTOR (2) /**< The growth factor when an array is enlarged.
|
||||
*/
|
||||
#define BYTE_ARRAY_HEADER_SIZE (5) /**< The size of the header of a byte array.
|
||||
#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array.
|
||||
*/
|
||||
|
||||
|
||||
@ -284,5 +284,34 @@ byte_t* obi_str_to_obibytes(char* value);
|
||||
const char* obi_obibytes_to_str(byte_t* value_b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a DNA sequence to a byte array with a header.
|
||||
*
|
||||
* @warning The byte array must be freed by the caller.
|
||||
*
|
||||
* @param value The DNA sequence to convert.
|
||||
*
|
||||
* @returns A pointer to the byte array created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* obi_seq_to_obibytes(char* seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a byte array to a DNA sequence.
|
||||
*
|
||||
* @param value_b The byte array to convert.
|
||||
*
|
||||
* @returns A pointer to the DNA sequence contained in the byte array.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_obibytes_to_seq(byte_t* value_b);
|
||||
|
||||
|
||||
#endif /* OBIARRAY_H_ */
|
||||
|
||||
|
@ -533,12 +533,12 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
obidebug(1, "\nCan't create column because of empty column name");
|
||||
return NULL;
|
||||
}
|
||||
if ((data_type < 1) || (data_type > 5))
|
||||
if ((data_type < 1) || (data_type > 6))
|
||||
{
|
||||
obidebug(1, "\nCan't create column because of invalid data type");
|
||||
return NULL;
|
||||
}
|
||||
if ((data_type == 5) && (array_name == NULL))
|
||||
if (((data_type == 5) || (data_type == 6)) && (array_name == NULL))
|
||||
{
|
||||
obidebug(1, "\nCan't create column because of empty array name");
|
||||
return NULL;
|
||||
@ -701,8 +701,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
if (comments != NULL)
|
||||
strncpy(header->comments, comments, COMMENTS_MAX_LENGTH);
|
||||
|
||||
// If the data type is OBI_IDX, the associated obi_array is opened or created
|
||||
if (data_type == 5)
|
||||
// If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created
|
||||
if ((data_type == 5) || (data_type == 6))
|
||||
{
|
||||
array = obi_array(dms, array_name);
|
||||
if (array == NULL)
|
||||
@ -838,8 +838,8 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
|
||||
column->writable = false;
|
||||
|
||||
// If the data type is OBI_IDX, the associated obi_array is opened or created
|
||||
if ((column->header)->data_type == 5)
|
||||
// If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created
|
||||
if (((column->header)->data_type == 5) || ((column->header)->data_type == 6))
|
||||
{
|
||||
array = obi_array(dms, (column->header)->array_name);
|
||||
if (array == NULL)
|
||||
@ -1175,7 +1175,8 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
|
||||
}
|
||||
break;
|
||||
|
||||
case OBI_IDX: for (i=start;i<end;i++)
|
||||
case OBI_STR:
|
||||
case OBI_SEQ: for (i=start;i<end;i++)
|
||||
{
|
||||
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
|
||||
}
|
||||
|
120
src/obidmscolumn_seq.c
Normal file
120
src/obidmscolumn_seq.c
Normal file
@ -0,0 +1,120 @@
|
||||
/****************************************************************************
|
||||
* OBIDMS_column_seq functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obidsmcolumn_seq.c
|
||||
* @author Celine Mercier
|
||||
* @date November 18th 2015
|
||||
* @brief Functions handling OBIColumns containing data in the form of indices referring to DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "obidmscolumn.h"
|
||||
#include "obitypes.h"
|
||||
#include "obierrno.h"
|
||||
#include "obidebug.h"
|
||||
#include "obiarray.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value)
|
||||
{
|
||||
byte_t* value_b;
|
||||
index_t idx;
|
||||
|
||||
// Check that the line number is not greater than the maximum allowed
|
||||
if (line_nb >= MAXIMUM_LINE_COUNT)
|
||||
{
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check if the file needs to be enlarged
|
||||
while ((line_nb+1) > (column->header)->line_count)
|
||||
{
|
||||
// Enlarge the file
|
||||
if (obi_enlarge_column(column) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Update lines used
|
||||
if ((line_nb+1) > (column->header)->lines_used)
|
||||
(column->header)->lines_used = line_nb+1;
|
||||
|
||||
// Encode the value on a byte array with a header
|
||||
value_b = obi_seq_to_obibytes(value);
|
||||
if (value_b == NULL)
|
||||
return -1;
|
||||
|
||||
// Add in the obiarray
|
||||
idx = obi_array_add(column->array, value_b);
|
||||
if (idx == -1)
|
||||
return -1;
|
||||
|
||||
// Add the value's index in the column
|
||||
*(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = idx;
|
||||
|
||||
free(value_b);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
|
||||
{
|
||||
index_t idx;
|
||||
byte_t* value_b;
|
||||
|
||||
if ((line_nb+1) > (column->header)->lines_used)
|
||||
{
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
|
||||
return "\0"; // TODO
|
||||
}
|
||||
|
||||
idx = *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
|
||||
|
||||
// Check NA
|
||||
if (idx == OBIIdx_NA)
|
||||
return "\0"; // TODO
|
||||
|
||||
value_b = obi_array_get(column->array, idx);
|
||||
return obi_obibytes_to_seq(value_b);
|
||||
}
|
||||
|
||||
|
||||
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, char* value)
|
||||
{
|
||||
index_t element_idx;
|
||||
element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||
if (element_idx == OBIIdx_NA)
|
||||
return -1;
|
||||
if (obi_column_set_obiseq_with_elt_idx(column, line_nb, element_idx, value) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
const char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
|
||||
{
|
||||
index_t element_idx;
|
||||
|
||||
element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||
if (element_idx == OBIIdx_NA)
|
||||
return "\0";
|
||||
return obi_column_get_obiseq_with_elt_idx(column, line_nb, element_idx);
|
||||
}
|
||||
|
101
src/obidmscolumn_seq.h
Normal file
101
src/obidmscolumn_seq.h
Normal file
@ -0,0 +1,101 @@
|
||||
/****************************************************************************
|
||||
* OBIDMS_column_seq header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obidsmcolumn_seq.h
|
||||
* @author Celine Mercier
|
||||
* @date Novemeber 18th 2015
|
||||
* @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OBIDMSCOLUMN_SEQ_H_
|
||||
#define OBIDMSCOLUMN_SEQ_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "obidmscolumn.h"
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to DNA sequences in an obiarray, using the index of the element in the line.
|
||||
*
|
||||
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||
*
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be set.
|
||||
* @param element_idx The index of the element that should be set in the line.
|
||||
* @param value The value that should be set.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to DNA sequences in an obiarray, using the index of the element in the line.
|
||||
*
|
||||
* @param column A pointer as returned by obi_create_column().
|
||||
* @param line_nb The number of the line where the value should be recovered.
|
||||
* @param element_idx The index of the element that should be recovered in the line.
|
||||
*
|
||||
* @returns The recovered value.
|
||||
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to DNA sequences in an obiarray, using the name of the element in the line.
|
||||
*
|
||||
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||
*
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be set.
|
||||
* @param element_name The name of the element that should be set in the line.
|
||||
* @param value The value that should be set.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to DNA sequences in an obiarray, using the name of the element in the line.
|
||||
*
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be recovered.
|
||||
* @param element_name The name of the element that should be recovered in the line.
|
||||
*
|
||||
* @returns The recovered value.
|
||||
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
|
||||
|
||||
|
||||
#endif /* OBIDMSCOLUMN_SEQ_H_ */
|
||||
|
@ -97,5 +97,5 @@ int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb,
|
||||
const char* obi_column_get_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
|
||||
|
||||
|
||||
#endif /* OBIDMSCOLUMN_IDX_H_ */
|
||||
#endif /* OBIDMSCOLUMN_STR_H_ */
|
||||
|
||||
|
@ -40,7 +40,10 @@ size_t obi_sizeof(OBIType_t type)
|
||||
case OBI_CHAR: size = sizeof(obichar_t);
|
||||
break;
|
||||
|
||||
case OBI_IDX: size = sizeof(index_t);
|
||||
case OBI_STR: size = sizeof(index_t);
|
||||
break;
|
||||
|
||||
case OBI_SEQ: size = sizeof(index_t);
|
||||
break;
|
||||
|
||||
default: size = 0;
|
||||
@ -90,7 +93,10 @@ char* name_data_type(int data_type)
|
||||
case OBI_CHAR: name = strdup("OBI_CHAR");
|
||||
break;
|
||||
|
||||
case OBI_IDX: name = strdup("OBI_IDX");
|
||||
case OBI_STR: name = strdup("OBI_STR");
|
||||
break;
|
||||
|
||||
case OBI_SEQ: name = strdup("OBI_SEQ");
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,8 @@ typedef enum OBIType {
|
||||
OBI_FLOAT, /**< a floating value (C type : double) */
|
||||
OBI_BOOL, /**< a boolean true/false value, see obibool_t enum */
|
||||
OBI_CHAR, /**< a character (C type : char) */
|
||||
OBI_IDX /**< an index in a data structure (C type : int64_t) */
|
||||
OBI_STR, /**< an index in a data structure (C type : int64_t) referring to a character string*/
|
||||
OBI_SEQ /**< an index in a data structure (C type : int64_t) referring to a DNA sequence*/
|
||||
} OBIType_t, *OBIType_p;
|
||||
|
||||
|
||||
@ -52,7 +53,7 @@ typedef int64_t index_t;
|
||||
typedef int32_t obiint_t;
|
||||
typedef double obifloat_t;
|
||||
typedef char obichar_t;
|
||||
|
||||
// TODO same for obistr_t and obiseq_t ?
|
||||
|
||||
/**
|
||||
* @brief Union used to compute the NA value of the OBI_FLOAT OBIType.
|
||||
|
Reference in New Issue
Block a user