Renamed "Obi_byte_arrays" to "Obiblobs" and moved Obiblob functions to

separate obiblob.c and obiblob.h files
This commit is contained in:
Celine Mercier
2016-04-12 11:21:14 +02:00
parent c225cfd8b6
commit 375bfcce8a
8 changed files with 304 additions and 249 deletions

View File

@ -349,111 +349,6 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
} }
Obi_byte_array_p obi_byte_array(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value)
{
Obi_byte_array_p byte_array;
// Allocate the memory for the byte array structure
byte_array = (Obi_byte_array_p) malloc(sizeof(Obi_byte_array_t) + length_encoded_value);
if (byte_array == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for a byte array");
return NULL;
}
// Store the number of bits on which each element is encoded
byte_array->element_size = element_size;
// Store the length (in bytes) of the encoded value
byte_array->length_encoded_value = length_encoded_value;
// Store the initial length (in bytes) of the decoded value
byte_array->length_decoded_value = length_decoded_value;
// Store the encoded value
memcpy(byte_array->value, encoded_value, length_encoded_value);
return byte_array;
}
Obi_byte_array_p obi_str_to_obibytes(char* value)
{
Obi_byte_array_p value_b;
int32_t length;
// Compute the number of bytes on which the value will be encoded
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
value_b = obi_byte_array(value, ELEMENT_SIZE_STR, length, length);
if (value_b == NULL)
{
obidebug(1, "\nError encoding a character string in a byte array");
return NULL;
}
return value_b;
}
const char* obi_obibytes_to_str(Obi_byte_array_p value_b)
{
fprintf(stderr, "\n%s", value_b->value);
return value_b->value;
}
Obi_byte_array_p obi_seq_to_obibytes(char* seq)
{
Obi_byte_array_p value_b;
int32_t length_encoded_seq; // length of the encoded sequence in bytes
int32_t seq_length;
byte_t* encoded_seq;
seq_length = strlen(seq);
// Check if just ATGC and encode accordingly
if (only_ATGC(seq))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
// Encode
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_byte_array(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
// Encode
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_byte_array(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
}
free(encoded_seq);
return value_b;
}
const char* obi_obibytes_to_seq(Obi_byte_array_p value_b)
{
// Decode
if (value_b->element_size == 2)
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
else
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
}
// TODO same for int
///////////////////// FOR DEBUGGING /////////////////////////// ///////////////////// FOR DEBUGGING ///////////////////////////
//NOTE: The first byte is printed the first (at the left-most). //NOTE: The first byte is printed the first (at the left-most).

View File

@ -26,27 +26,6 @@
*/ */
#define NUC_MASK_4B 0xF /**< Binary: 1111 to use when decoding 4 bits sequences #define NUC_MASK_4B 0xF /**< Binary: 1111 to use when decoding 4 bits sequences
*/ */
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
*/
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
*/
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.
*/
/**
* @brief Byte array structure.
*/
typedef struct Obi_byte_array {
uint8_t element_size; /**< Size in bits of one element from the value.
*/
int32_t length_encoded_value; /**< Length in bytes of the encoded value.
*/
int32_t length_decoded_value; /**< Length in bytes of the decoded value.
*/
byte_t value[]; /**< Encoded value.
*/
} Obi_byte_array_t, *Obi_byte_array_p;
/** /**
@ -201,65 +180,6 @@ byte_t* encode_seq_on_4_bits(char* seq, int32_t length);
char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq); char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq);
/**
* @brief Converts a character string to a byte array with a header.
*
* @warning The byte array must be freed by the caller.
*
* @param value The character string to convert.
*
* @returns A pointer to the byte array created.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_byte_array_p obi_str_to_obibytes(char* value);
/**
* @brief Converts a byte array to a character string.
*
* @param value_b The byte array to convert.
*
* @returns A pointer to the character string contained in the byte array.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_obibytes_to_str(Obi_byte_array_p value_b);
/**
* @brief Converts a DNA sequence to a byte array with a header.
*
* @warning The byte array must be freed by the caller.
*
* @param value The DNA sequence to convert.
*
* @returns A pointer to the byte array created.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_byte_array_p obi_seq_to_obibytes(char* seq);
/**
* @brief Converts a byte array to a DNA sequence.
*
* @param value_b The byte array to convert.
*
* @returns A pointer to the DNA sequence contained in the byte array.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_obibytes_to_seq(Obi_byte_array_p value_b); // TODO move to encode source files
////////// FOR DEBUGGING /////////// ////////// FOR DEBUGGING ///////////
// little endian // little endian

View File

@ -22,6 +22,7 @@
#include "bloom.h" #include "bloom.h"
#include "crc64.h" #include "crc64.h"
#include "obiavl.h" #include "obiavl.h"
#include "obiblob.h"
#include "obierrno.h" #include "obierrno.h"
#include "obitypes.h" #include "obitypes.h"
#include "obidebug.h" #include "obidebug.h"
@ -270,7 +271,7 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group);
* The function checks a bloom filter. No false negatives, possible false positives. * The function checks a bloom filter. No false negatives, possible false positives.
* *
* @param avl A pointer to the AVL tree structure. * @param avl A pointer to the AVL tree structure.
* @param value A pointer to the byte array structure. * @param value A pointer to the blob structure.
* *
* @retval 0 if the value is definitely not already stored in the AVL tree. * @retval 0 if the value is definitely not already stored in the AVL tree.
* @retval 1 if the value might already be stored in the AVL tree. * @retval 1 if the value might already be stored in the AVL tree.
@ -278,17 +279,17 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group);
* @since April 2016 * @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
int maybe_in_avl(OBIDMS_avl_p avl, Obi_byte_array_p value); int maybe_in_avl(OBIDMS_avl_p avl, Obi_blob_p value);
/** /**
* @brief Internal function comparing two byte arrays. * @brief Internal function comparing two blobs.
* *
* The encoding is compared first, then the length of the * The encoding is compared first, then the length of the
* values, then the values themselves. * values, then the values themselves.
* *
* @param value_1 A pointer to the first byte array structure. * @param value_1 A pointer to the first blob structure.
* @param value_2 A pointer to the second byte array structure. * @param value_2 A pointer to the second blob structure.
* *
* @returns A value < 0 if value_1 < value_2, * @returns A value < 0 if value_1 < value_2,
* a value > 0 if value_1 > value_2, * a value > 0 if value_1 > value_2,
@ -297,27 +298,27 @@ int maybe_in_avl(OBIDMS_avl_p avl, Obi_byte_array_p value);
* @since October 2015 * @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
int byte_array_compare(Obi_byte_array_p value_1, Obi_byte_array_p value_2); int blob_compare(Obi_blob_p value_1, Obi_blob_p value_2);
/** /**
* @brief Internal function calculating the size in bytes of a byte array. * @brief Internal function calculating the size in bytes of a blob.
* *
* @param value A pointer to the byte array structure. * @param value A pointer to the blob structure.
* *
* @returns The size of the byte array in bytes. * @returns The size of the blob in bytes.
* *
* @since October 2015 * @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
int byte_array_sizeof(Obi_byte_array_p value); int blob_sizeof(Obi_blob_p value);
/** /**
* @brief Internal function storing a value (byte array) in the data array referred to by an AVL tree. * @brief Internal function storing a value (blob) in the data array referred to by an AVL tree.
* *
* @param avl A pointer to the AVL tree structure. * @param avl A pointer to the AVL tree structure.
* @param value A pointer to the value (byte array structure). * @param value A pointer to the value (blob structure).
* *
* @returns The index of the stored value. * @returns The index of the stored value.
* @retval -1 if an error occurred. * @retval -1 if an error occurred.
@ -325,7 +326,7 @@ int byte_array_sizeof(Obi_byte_array_p value);
* @since December 2015 * @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_byte_array_p value); index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_blob_p value);
/** /**
@ -979,13 +980,13 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group)
} }
int maybe_in_avl(OBIDMS_avl_p avl, Obi_byte_array_p value) int maybe_in_avl(OBIDMS_avl_p avl, Obi_blob_p value)
{ {
return (bloom_check(&((avl->header)->bloom_filter), value, byte_array_sizeof(value))); return (bloom_check(&((avl->header)->bloom_filter), value, blob_sizeof(value)));
} }
int byte_array_compare(Obi_byte_array_p value_1, Obi_byte_array_p value_2) int blob_compare(Obi_blob_p value_1, Obi_blob_p value_2)
{ {
int comp; int comp;
int32_t b; int32_t b;
@ -1013,13 +1014,13 @@ int byte_array_compare(Obi_byte_array_p value_1, Obi_byte_array_p value_2)
} }
int byte_array_sizeof(Obi_byte_array_p value) int blob_sizeof(Obi_blob_p value)
{ {
return (sizeof(Obi_byte_array_t) + (value->length_encoded_value)); return (sizeof(Obi_blob_t) + (value->length_encoded_value));
} }
index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_byte_array_p value) index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_blob_p value)
{ {
index_t value_idx; index_t value_idx;
int value_size; int value_size;
@ -1027,7 +1028,7 @@ index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_byte_array_p value)
value_idx = ((avl->data)->header)->data_size_used; value_idx = ((avl->data)->header)->data_size_used;
// Grow the data if needed // Grow the data if needed
value_size = byte_array_sizeof(value); value_size = blob_sizeof(value);
while (((avl->data)->header)->data_size_max < (value_idx + value_size)) while (((avl->data)->header)->data_size_max < (value_idx + value_size))
{ {
if (grow_avl_data(avl->data) < 0) if (grow_avl_data(avl->data) < 0)
@ -2047,20 +2048,20 @@ int obi_close_avl_group(OBIDMS_avl_group_p avl_group)
} }
Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t idx) Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t idx)
{ {
return ((Obi_byte_array_p)(((avl->data)->data)+idx)); return ((Obi_blob_p)(((avl->data)->data)+idx));
} }
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value) index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value)
{ {
AVL_node_p node_to_add = NULL; AVL_node_p node_to_add = NULL;
AVL_node_p current_node; AVL_node_p current_node;
index_t next, parent; index_t next, parent;
index_t value_data_idx; index_t value_data_idx;
index_t node_idx; index_t node_idx;
Obi_byte_array_p to_compare; Obi_blob_p to_compare;
int comp; int comp;
int n; int n;
int depth; int depth;
@ -2068,7 +2069,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value)
n = 0; n = 0;
depth = 0; depth = 0;
crc = crc64((byte_t*)value, byte_array_sizeof(value)); crc = crc64((byte_t*)value, blob_sizeof(value));
// Check if first node // Check if first node
if (!((avl->header)->nb_items)) if (!((avl->header)->nb_items))
@ -2115,7 +2116,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value)
if (comp == 0) if (comp == 0)
{ // check if really same value { // check if really same value
to_compare = obi_avl_get(avl, current_node->value); to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value); comp = blob_compare(to_compare, value);
} }
if (comp > 0) if (comp > 0)
@ -2189,15 +2190,15 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value)
// Find if a value is already in an AVL tree // Find if a value is already in an AVL tree
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value) index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value)
{ {
int comp; int comp;
index_t next; index_t next;
Obi_byte_array_p to_compare; Obi_blob_p to_compare;
AVL_node_p current_node; AVL_node_p current_node;
uint64_t crc; uint64_t crc;
crc = crc64((byte_t*)value, byte_array_sizeof(value)); crc = crc64((byte_t*)value, blob_sizeof(value));
next = (avl->header)->root_idx; next = (avl->header)->root_idx;
while (next != -1) while (next != -1)
@ -2210,7 +2211,7 @@ index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value)
if (comp == 0) if (comp == 0)
{ // Check if really same value { // Check if really same value
to_compare = obi_avl_get(avl, current_node->value); to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value); comp = blob_compare(to_compare, value);
} }
if (comp > 0) if (comp > 0)
@ -2229,7 +2230,7 @@ index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value)
} }
Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx) Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx)
{ {
int32_t avl_idx; int32_t avl_idx;
index_t idx_in_avl; index_t idx_in_avl;
@ -2241,7 +2242,7 @@ Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx)
} }
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value) index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value)
{ {
int32_t index_in_avl; int32_t index_in_avl;
index_t index_with_avl; index_t index_with_avl;
@ -2286,7 +2287,7 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value)
} }
// Add in the current AVL // Add in the current AVL
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, byte_array_sizeof(value)); bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, blob_sizeof(value));
index_in_avl = (int32_t) obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value); index_in_avl = (int32_t) obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value);
// Build the index containing the AVL index // Build the index containing the AVL index

View File

@ -6,7 +6,7 @@
* @file obiavl.h * @file obiavl.h
* @author Celine Mercier * @author Celine Mercier
* @date December 3rd 2015 * @date December 3rd 2015
* @brief Header file for handling AVL trees for storing and retrieving byte arrays (i.e. coding for character strings). * @brief Header file for handling AVL trees for storing and retrieving blobs (i.e. coding for character strings).
*/ */
@ -23,6 +23,7 @@
#include <stdbool.h> #include <stdbool.h>
#include "obidms.h" #include "obidms.h"
#include "obiblob.h"
#include "obitypes.h" #include "obitypes.h"
#include "bloom.h" #include "bloom.h"
#include "utils.h" #include "utils.h"
@ -314,28 +315,28 @@ int obi_close_avl_group(OBIDMS_avl_group_p avl_group);
/** /**
* @brief Recovers a value (byte array) in an AVL tree. * @brief Recovers a value (blob) in an AVL tree.
* *
* @warning The byte array recovered must be decoded to get the original value. * @warning The blob recovered must be decoded to get the original value.
* *
* @param avl A pointer to the AVL tree. * @param avl A pointer to the AVL tree.
* @param index The index of the value in the data array. * @param index The index of the value in the data array.
* *
* @returns A pointer to the byte array recovered. * @returns A pointer to the blob recovered.
* *
* @since December 2015 * @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t index); Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
/** /**
* @brief Adds a value (byte array) in an AVL tree NOT checking first if it is already in it. // TODO to discuss * @brief Adds a value (blob) in an AVL tree NOT checking first if it is already in it. // TODO to discuss
* *
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t). * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
* *
* @param avl A pointer to the AVL tree. * @param avl A pointer to the AVL tree.
* @param value The byte array to add in the AVL tree. * @param value The blob to add in the AVL tree.
* *
* @returns The index of the value newly added in the AVL tree. * @returns The index of the value newly added in the AVL tree.
* @retval -1 if an error occurred. * @retval -1 if an error occurred.
@ -343,16 +344,16 @@ Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
* @since December 2015 * @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value); index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value);
/** /**
* @brief Finds a value (byte array) in an AVL tree. * @brief Finds a value (blob) in an AVL tree.
* *
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t). * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
* *
* @param avl A pointer to the AVL tree. * @param avl A pointer to the AVL tree.
* @param value The byte array to add in the AVL tree. * @param value The blob to add in the AVL tree.
* *
* @returns The data index of the value. * @returns The data index of the value.
* @retval -1 if the value is not in the tree. * @retval -1 if the value is not in the tree.
@ -360,32 +361,32 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value);
* @since December 2015 * @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value); index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value);
/** /**
* @brief Recovers a value (byte array) in an AVL tree. * @brief Recovers a value (blob) in an AVL tree.
* *
* @warning The byte array recovered must be decoded to get the original value. * @warning The blob recovered must be decoded to get the original value.
* *
* @param avl_group A pointer to the AVL tree. * @param avl_group A pointer to the AVL tree.
* @param index The index of the value in the data array. * @param index The index of the value in the data array.
* *
* @returns A pointer to the byte array recovered. * @returns A pointer to the blob recovered.
* *
* @since April 2016 * @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx); Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
/** /**
* @brief Adds a value (byte array) in an AVL tree group, checking if it is already in it. * @brief Adds a value (blob) in an AVL tree group, checking if it is already in it.
* *
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t). * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
* *
* @param avl_group A pointer to the AVL tree group. * @param avl_group A pointer to the AVL tree group.
* @param value The byte array to add in the AVL tree group. * @param value The blob to add in the AVL tree group.
* *
* @returns The index of the value newly added in the AVL tree group. * @returns The index of the value newly added in the AVL tree group.
* @retval -1 if an error occurred. * @retval -1 if an error occurred.
@ -393,7 +394,7 @@ Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
* @since April 2016 * @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value); index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value);
#endif /* OBIAVL_H_ */ #endif /* OBIAVL_H_ */

130
src/obiblob.c Normal file
View File

@ -0,0 +1,130 @@
/****************************************************************************
* Obiblob functions *
****************************************************************************/
/**
* @file obiblob.c
* @author Celine Mercier
* @date April 11th 2016
* @brief Functions handling Obiblob structures.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include "obierrno.h"
#include "obitypes.h" // For byte_t type
#include "obidebug.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO: endianness problem?
Obi_blob_p obi_blob(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value)
{
Obi_blob_p blob;
// Allocate the memory for the blob structure
blob = (Obi_blob_p) malloc(sizeof(Obi_blob_t) + length_encoded_value);
if (blob == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a blob");
return NULL;
}
// Store the number of bits on which each element is encoded
blob->element_size = element_size;
// Store the length (in bytes) of the encoded value
blob->length_encoded_value = length_encoded_value;
// Store the initial length (in bytes) of the decoded value
blob->length_decoded_value = length_decoded_value;
// Store the encoded value
memcpy(blob->value, encoded_value, length_encoded_value);
return blob;
}
Obi_blob_p obi_str_to_blob(char* value)
{
Obi_blob_p value_b;
int32_t length;
// Compute the number of bytes on which the value will be encoded
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
value_b = obi_blob(value, ELEMENT_SIZE_STR, length, length);
if (value_b == NULL)
{
obidebug(1, "\nError encoding a character string in a blob");
return NULL;
}
return value_b;
}
const char* obi_blob_to_str(Obi_blob_p value_b)
{
return value_b->value;
}
Obi_blob_p obi_seq_to_blob(char* seq)
{
Obi_blob_p value_b;
int32_t length_encoded_seq; // length of the encoded sequence in bytes
int32_t seq_length;
byte_t* encoded_seq;
seq_length = strlen(seq);
// Check if just ATGC and encode accordingly
if (only_ATGC(seq))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
// Encode
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
// Encode
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
}
free(encoded_seq);
return value_b;
}
const char* obi_blob_to_seq(Obi_blob_p value_b)
{
// Decode
if (value_b->element_size == 2)
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
else
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
}
// TODO same for int

108
src/obiblob.h Normal file
View File

@ -0,0 +1,108 @@
/****************************************************************************
* Encoding header file *
****************************************************************************/
/**
* @file obiblob.h
* @author Celine Mercier
* @date November 18th 2015
* @brief Header file for handling Obi_blob structures.
*/
#ifndef OBIBLOB_H_
#define OBIBLOB_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include "obitypes.h"
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
*/
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
*/
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.
*/
/**
* @brief Blob structure.
* TODO
*/
typedef struct Obi_blob {
uint8_t element_size; /**< Size in bits of one element from the value.
*/
int32_t length_encoded_value; /**< Length in bytes of the encoded value.
*/
int32_t length_decoded_value; /**< Length in bytes of the decoded value.
*/
byte_t value[]; /**< Encoded value.
*/
} Obi_blob_t, *Obi_blob_p;
/**
* @brief Converts a character string to a blob.
*
* @warning The blob must be freed by the caller.
*
* @param value The character string to convert.
*
* @returns A pointer to the blob created.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_str_to_blob(char* value);
/**
* @brief Converts a blob to a character string.
*
* @param value_b The blob to convert.
*
* @returns A pointer to the character string contained in the blob.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_blob_to_str(Obi_blob_p value_b);
/**
* @brief Converts a DNA sequence to a blob with a header.
*
* @warning The blob must be freed by the caller.
*
* @param value The DNA sequence to convert.
*
* @returns A pointer to the blob created.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_seq_to_blob(char* seq);
/**
* @brief Converts a blob to a DNA sequence.
*
* @param value_b The blob to convert.
*
* @returns A pointer to the DNA sequence contained in the blob.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_blob_to_seq(Obi_blob_p value_b); // TODO move to encode source files
#endif /* OBIBLOB_H_ */

View File

@ -33,8 +33,8 @@
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value) int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value)
{ {
Obi_byte_array_p value_b; Obi_blob_p value_b;
index_t idx; index_t idx;
// Check that the line number is not greater than the maximum allowed // Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT) if (line_nb >= MAXIMUM_LINE_COUNT)
@ -57,7 +57,7 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
(column->header)->lines_used = line_nb+1; (column->header)->lines_used = line_nb+1;
// Encode the value on a byte array with a header // TODO make function // Encode the value on a byte array with a header // TODO make function
value_b = obi_seq_to_obibytes(value); value_b = obi_seq_to_blob(value);
if (value_b == NULL) if (value_b == NULL)
return -1; return -1;
@ -115,7 +115,7 @@ int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c
const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{ {
index_t idx; index_t idx;
Obi_byte_array_p value_b; Obi_blob_p value_b;
if ((line_nb+1) > ((column->header)->line_count)) if ((line_nb+1) > ((column->header)->line_count))
{ {
@ -132,7 +132,7 @@ const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t l
value_b = obi_avl_group_get(column->avl, idx); value_b = obi_avl_group_get(column->avl, idx);
return obi_obibytes_to_seq(value_b); return obi_blob_to_seq(value_b);
} }

View File

@ -32,7 +32,7 @@
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value) int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value)
{ {
Obi_byte_array_p value_b; Obi_blob_p value_b;
index_t idx; index_t idx;
// Check that the line number is not greater than the maximum allowed // Check that the line number is not greater than the maximum allowed
@ -56,7 +56,7 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
(column->header)->lines_used = line_nb+1; (column->header)->lines_used = line_nb+1;
// Encode the value on a byte array with a header // Encode the value on a byte array with a header
value_b = obi_str_to_obibytes(value); value_b = obi_str_to_blob(value);
if (value_b == NULL) if (value_b == NULL)
return -1; return -1;
@ -84,7 +84,7 @@ int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c
return -1; return -1;
} }
if ((view->line_selection != NULL) || (!(column->writable))) if ((view->line_selection != NULL) || (!(column->writable))) // TODO why check here writable?
{ {
// Get the right line number // Get the right line number
if (column->writable) if (column->writable)
@ -114,7 +114,7 @@ int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c
const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{ {
index_t idx; index_t idx;
Obi_byte_array_p value_b; Obi_blob_p value_b;
if ((line_nb+1) > ((column->header)->line_count)) if ((line_nb+1) > ((column->header)->line_count))
{ {
@ -131,7 +131,7 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l
value_b = obi_avl_group_get(column->avl, idx); value_b = obi_avl_group_get(column->avl, idx);
return obi_obibytes_to_str(value_b); return obi_blob_to_str(value_b);
} }