Renamed "Obi_byte_arrays" to "Obiblobs" and moved Obiblob functions to
separate obiblob.c and obiblob.h files
This commit is contained in:
105
src/encode.c
105
src/encode.c
@ -349,111 +349,6 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
|
||||
}
|
||||
|
||||
|
||||
Obi_byte_array_p obi_byte_array(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value)
|
||||
{
|
||||
Obi_byte_array_p byte_array;
|
||||
|
||||
// Allocate the memory for the byte array structure
|
||||
byte_array = (Obi_byte_array_p) malloc(sizeof(Obi_byte_array_t) + length_encoded_value);
|
||||
if (byte_array == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR); // TODO
|
||||
obidebug(1, "\nError allocating memory for a byte array");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Store the number of bits on which each element is encoded
|
||||
byte_array->element_size = element_size;
|
||||
|
||||
// Store the length (in bytes) of the encoded value
|
||||
byte_array->length_encoded_value = length_encoded_value;
|
||||
|
||||
// Store the initial length (in bytes) of the decoded value
|
||||
byte_array->length_decoded_value = length_decoded_value;
|
||||
|
||||
// Store the encoded value
|
||||
memcpy(byte_array->value, encoded_value, length_encoded_value);
|
||||
|
||||
return byte_array;
|
||||
}
|
||||
|
||||
|
||||
Obi_byte_array_p obi_str_to_obibytes(char* value)
|
||||
{
|
||||
Obi_byte_array_p value_b;
|
||||
int32_t length;
|
||||
|
||||
// Compute the number of bytes on which the value will be encoded
|
||||
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
|
||||
|
||||
value_b = obi_byte_array(value, ELEMENT_SIZE_STR, length, length);
|
||||
if (value_b == NULL)
|
||||
{
|
||||
obidebug(1, "\nError encoding a character string in a byte array");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return value_b;
|
||||
}
|
||||
|
||||
|
||||
const char* obi_obibytes_to_str(Obi_byte_array_p value_b)
|
||||
{
|
||||
fprintf(stderr, "\n%s", value_b->value);
|
||||
return value_b->value;
|
||||
}
|
||||
|
||||
|
||||
Obi_byte_array_p obi_seq_to_obibytes(char* seq)
|
||||
{
|
||||
Obi_byte_array_p value_b;
|
||||
int32_t length_encoded_seq; // length of the encoded sequence in bytes
|
||||
int32_t seq_length;
|
||||
byte_t* encoded_seq;
|
||||
|
||||
seq_length = strlen(seq);
|
||||
|
||||
// Check if just ATGC and encode accordingly
|
||||
if (only_ATGC(seq))
|
||||
{
|
||||
// Compute the length (in bytes) of the encoded sequence
|
||||
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
|
||||
// Encode
|
||||
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
|
||||
if (encoded_seq == NULL)
|
||||
return NULL;
|
||||
value_b = obi_byte_array(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compute the length (in bytes) of the encoded sequence
|
||||
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
|
||||
// Encode
|
||||
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
|
||||
if (encoded_seq == NULL)
|
||||
return NULL;
|
||||
value_b = obi_byte_array(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
|
||||
}
|
||||
|
||||
free(encoded_seq);
|
||||
|
||||
return value_b;
|
||||
}
|
||||
|
||||
|
||||
const char* obi_obibytes_to_seq(Obi_byte_array_p value_b)
|
||||
{
|
||||
// Decode
|
||||
if (value_b->element_size == 2)
|
||||
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
|
||||
else
|
||||
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
|
||||
}
|
||||
|
||||
|
||||
// TODO same for int
|
||||
|
||||
|
||||
///////////////////// FOR DEBUGGING ///////////////////////////
|
||||
//NOTE: The first byte is printed the first (at the left-most).
|
||||
|
||||
|
80
src/encode.h
80
src/encode.h
@ -26,27 +26,6 @@
|
||||
*/
|
||||
#define NUC_MASK_4B 0xF /**< Binary: 1111 to use when decoding 4 bits sequences
|
||||
*/
|
||||
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
|
||||
*/
|
||||
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
|
||||
*/
|
||||
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Byte array structure.
|
||||
*/
|
||||
typedef struct Obi_byte_array {
|
||||
uint8_t element_size; /**< Size in bits of one element from the value.
|
||||
*/
|
||||
int32_t length_encoded_value; /**< Length in bytes of the encoded value.
|
||||
*/
|
||||
int32_t length_decoded_value; /**< Length in bytes of the decoded value.
|
||||
*/
|
||||
byte_t value[]; /**< Encoded value.
|
||||
*/
|
||||
} Obi_byte_array_t, *Obi_byte_array_p;
|
||||
|
||||
|
||||
/**
|
||||
@ -201,65 +180,6 @@ byte_t* encode_seq_on_4_bits(char* seq, int32_t length);
|
||||
char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a character string to a byte array with a header.
|
||||
*
|
||||
* @warning The byte array must be freed by the caller.
|
||||
*
|
||||
* @param value The character string to convert.
|
||||
*
|
||||
* @returns A pointer to the byte array created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_byte_array_p obi_str_to_obibytes(char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a byte array to a character string.
|
||||
*
|
||||
* @param value_b The byte array to convert.
|
||||
*
|
||||
* @returns A pointer to the character string contained in the byte array.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_obibytes_to_str(Obi_byte_array_p value_b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a DNA sequence to a byte array with a header.
|
||||
*
|
||||
* @warning The byte array must be freed by the caller.
|
||||
*
|
||||
* @param value The DNA sequence to convert.
|
||||
*
|
||||
* @returns A pointer to the byte array created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_byte_array_p obi_seq_to_obibytes(char* seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a byte array to a DNA sequence.
|
||||
*
|
||||
* @param value_b The byte array to convert.
|
||||
*
|
||||
* @returns A pointer to the DNA sequence contained in the byte array.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_obibytes_to_seq(Obi_byte_array_p value_b); // TODO move to encode source files
|
||||
|
||||
|
||||
////////// FOR DEBUGGING ///////////
|
||||
|
||||
// little endian
|
||||
|
67
src/obiavl.c
67
src/obiavl.c
@ -22,6 +22,7 @@
|
||||
#include "bloom.h"
|
||||
#include "crc64.h"
|
||||
#include "obiavl.h"
|
||||
#include "obiblob.h"
|
||||
#include "obierrno.h"
|
||||
#include "obitypes.h"
|
||||
#include "obidebug.h"
|
||||
@ -270,7 +271,7 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group);
|
||||
* The function checks a bloom filter. No false negatives, possible false positives.
|
||||
*
|
||||
* @param avl A pointer to the AVL tree structure.
|
||||
* @param value A pointer to the byte array structure.
|
||||
* @param value A pointer to the blob structure.
|
||||
*
|
||||
* @retval 0 if the value is definitely not already stored in the AVL tree.
|
||||
* @retval 1 if the value might already be stored in the AVL tree.
|
||||
@ -278,17 +279,17 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group);
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int maybe_in_avl(OBIDMS_avl_p avl, Obi_byte_array_p value);
|
||||
int maybe_in_avl(OBIDMS_avl_p avl, Obi_blob_p value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function comparing two byte arrays.
|
||||
* @brief Internal function comparing two blobs.
|
||||
*
|
||||
* The encoding is compared first, then the length of the
|
||||
* values, then the values themselves.
|
||||
*
|
||||
* @param value_1 A pointer to the first byte array structure.
|
||||
* @param value_2 A pointer to the second byte array structure.
|
||||
* @param value_1 A pointer to the first blob structure.
|
||||
* @param value_2 A pointer to the second blob structure.
|
||||
*
|
||||
* @returns A value < 0 if value_1 < value_2,
|
||||
* a value > 0 if value_1 > value_2,
|
||||
@ -297,27 +298,27 @@ int maybe_in_avl(OBIDMS_avl_p avl, Obi_byte_array_p value);
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int byte_array_compare(Obi_byte_array_p value_1, Obi_byte_array_p value_2);
|
||||
int blob_compare(Obi_blob_p value_1, Obi_blob_p value_2);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function calculating the size in bytes of a byte array.
|
||||
* @brief Internal function calculating the size in bytes of a blob.
|
||||
*
|
||||
* @param value A pointer to the byte array structure.
|
||||
* @param value A pointer to the blob structure.
|
||||
*
|
||||
* @returns The size of the byte array in bytes.
|
||||
* @returns The size of the blob in bytes.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int byte_array_sizeof(Obi_byte_array_p value);
|
||||
int blob_sizeof(Obi_blob_p value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function storing a value (byte array) in the data array referred to by an AVL tree.
|
||||
* @brief Internal function storing a value (blob) in the data array referred to by an AVL tree.
|
||||
*
|
||||
* @param avl A pointer to the AVL tree structure.
|
||||
* @param value A pointer to the value (byte array structure).
|
||||
* @param value A pointer to the value (blob structure).
|
||||
*
|
||||
* @returns The index of the stored value.
|
||||
* @retval -1 if an error occurred.
|
||||
@ -325,7 +326,7 @@ int byte_array_sizeof(Obi_byte_array_p value);
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_byte_array_p value);
|
||||
index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_blob_p value);
|
||||
|
||||
|
||||
/**
|
||||
@ -979,13 +980,13 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group)
|
||||
}
|
||||
|
||||
|
||||
int maybe_in_avl(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
int maybe_in_avl(OBIDMS_avl_p avl, Obi_blob_p value)
|
||||
{
|
||||
return (bloom_check(&((avl->header)->bloom_filter), value, byte_array_sizeof(value)));
|
||||
return (bloom_check(&((avl->header)->bloom_filter), value, blob_sizeof(value)));
|
||||
}
|
||||
|
||||
|
||||
int byte_array_compare(Obi_byte_array_p value_1, Obi_byte_array_p value_2)
|
||||
int blob_compare(Obi_blob_p value_1, Obi_blob_p value_2)
|
||||
{
|
||||
int comp;
|
||||
int32_t b;
|
||||
@ -1013,13 +1014,13 @@ int byte_array_compare(Obi_byte_array_p value_1, Obi_byte_array_p value_2)
|
||||
}
|
||||
|
||||
|
||||
int byte_array_sizeof(Obi_byte_array_p value)
|
||||
int blob_sizeof(Obi_blob_p value)
|
||||
{
|
||||
return (sizeof(Obi_byte_array_t) + (value->length_encoded_value));
|
||||
return (sizeof(Obi_blob_t) + (value->length_encoded_value));
|
||||
}
|
||||
|
||||
|
||||
index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_blob_p value)
|
||||
{
|
||||
index_t value_idx;
|
||||
int value_size;
|
||||
@ -1027,7 +1028,7 @@ index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
value_idx = ((avl->data)->header)->data_size_used;
|
||||
|
||||
// Grow the data if needed
|
||||
value_size = byte_array_sizeof(value);
|
||||
value_size = blob_sizeof(value);
|
||||
while (((avl->data)->header)->data_size_max < (value_idx + value_size))
|
||||
{
|
||||
if (grow_avl_data(avl->data) < 0)
|
||||
@ -2047,20 +2048,20 @@ int obi_close_avl_group(OBIDMS_avl_group_p avl_group)
|
||||
}
|
||||
|
||||
|
||||
Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t idx)
|
||||
Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t idx)
|
||||
{
|
||||
return ((Obi_byte_array_p)(((avl->data)->data)+idx));
|
||||
return ((Obi_blob_p)(((avl->data)->data)+idx));
|
||||
}
|
||||
|
||||
|
||||
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value)
|
||||
{
|
||||
AVL_node_p node_to_add = NULL;
|
||||
AVL_node_p current_node;
|
||||
index_t next, parent;
|
||||
index_t value_data_idx;
|
||||
index_t node_idx;
|
||||
Obi_byte_array_p to_compare;
|
||||
Obi_blob_p to_compare;
|
||||
int comp;
|
||||
int n;
|
||||
int depth;
|
||||
@ -2068,7 +2069,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
|
||||
n = 0;
|
||||
depth = 0;
|
||||
crc = crc64((byte_t*)value, byte_array_sizeof(value));
|
||||
crc = crc64((byte_t*)value, blob_sizeof(value));
|
||||
|
||||
// Check if first node
|
||||
if (!((avl->header)->nb_items))
|
||||
@ -2115,7 +2116,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
if (comp == 0)
|
||||
{ // check if really same value
|
||||
to_compare = obi_avl_get(avl, current_node->value);
|
||||
comp = byte_array_compare(to_compare, value);
|
||||
comp = blob_compare(to_compare, value);
|
||||
}
|
||||
|
||||
if (comp > 0)
|
||||
@ -2189,15 +2190,15 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
|
||||
|
||||
// Find if a value is already in an AVL tree
|
||||
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value)
|
||||
{
|
||||
int comp;
|
||||
index_t next;
|
||||
Obi_byte_array_p to_compare;
|
||||
Obi_blob_p to_compare;
|
||||
AVL_node_p current_node;
|
||||
uint64_t crc;
|
||||
|
||||
crc = crc64((byte_t*)value, byte_array_sizeof(value));
|
||||
crc = crc64((byte_t*)value, blob_sizeof(value));
|
||||
|
||||
next = (avl->header)->root_idx;
|
||||
while (next != -1)
|
||||
@ -2210,7 +2211,7 @@ index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
if (comp == 0)
|
||||
{ // Check if really same value
|
||||
to_compare = obi_avl_get(avl, current_node->value);
|
||||
comp = byte_array_compare(to_compare, value);
|
||||
comp = blob_compare(to_compare, value);
|
||||
}
|
||||
|
||||
if (comp > 0)
|
||||
@ -2229,7 +2230,7 @@ index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value)
|
||||
}
|
||||
|
||||
|
||||
Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx)
|
||||
Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx)
|
||||
{
|
||||
int32_t avl_idx;
|
||||
index_t idx_in_avl;
|
||||
@ -2241,7 +2242,7 @@ Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx)
|
||||
}
|
||||
|
||||
|
||||
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value)
|
||||
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value)
|
||||
{
|
||||
int32_t index_in_avl;
|
||||
index_t index_with_avl;
|
||||
@ -2286,7 +2287,7 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value)
|
||||
}
|
||||
|
||||
// Add in the current AVL
|
||||
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, byte_array_sizeof(value));
|
||||
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, blob_sizeof(value));
|
||||
index_in_avl = (int32_t) obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value);
|
||||
|
||||
// Build the index containing the AVL index
|
||||
|
43
src/obiavl.h
43
src/obiavl.h
@ -6,7 +6,7 @@
|
||||
* @file obiavl.h
|
||||
* @author Celine Mercier
|
||||
* @date December 3rd 2015
|
||||
* @brief Header file for handling AVL trees for storing and retrieving byte arrays (i.e. coding for character strings).
|
||||
* @brief Header file for handling AVL trees for storing and retrieving blobs (i.e. coding for character strings).
|
||||
*/
|
||||
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "obidms.h"
|
||||
#include "obiblob.h"
|
||||
#include "obitypes.h"
|
||||
#include "bloom.h"
|
||||
#include "utils.h"
|
||||
@ -314,28 +315,28 @@ int obi_close_avl_group(OBIDMS_avl_group_p avl_group);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value (byte array) in an AVL tree.
|
||||
* @brief Recovers a value (blob) in an AVL tree.
|
||||
*
|
||||
* @warning The byte array recovered must be decoded to get the original value.
|
||||
* @warning The blob recovered must be decoded to get the original value.
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param index The index of the value in the data array.
|
||||
*
|
||||
* @returns A pointer to the byte array recovered.
|
||||
* @returns A pointer to the blob recovered.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
|
||||
Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds a value (byte array) in an AVL tree NOT checking first if it is already in it. // TODO to discuss
|
||||
* @brief Adds a value (blob) in an AVL tree NOT checking first if it is already in it. // TODO to discuss
|
||||
*
|
||||
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t).
|
||||
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param value The byte array to add in the AVL tree.
|
||||
* @param value The blob to add in the AVL tree.
|
||||
*
|
||||
* @returns The index of the value newly added in the AVL tree.
|
||||
* @retval -1 if an error occurred.
|
||||
@ -343,16 +344,16 @@ Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value);
|
||||
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Finds a value (byte array) in an AVL tree.
|
||||
* @brief Finds a value (blob) in an AVL tree.
|
||||
*
|
||||
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t).
|
||||
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param value The byte array to add in the AVL tree.
|
||||
* @param value The blob to add in the AVL tree.
|
||||
*
|
||||
* @returns The data index of the value.
|
||||
* @retval -1 if the value is not in the tree.
|
||||
@ -360,32 +361,32 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value);
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value);
|
||||
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value (byte array) in an AVL tree.
|
||||
* @brief Recovers a value (blob) in an AVL tree.
|
||||
*
|
||||
* @warning The byte array recovered must be decoded to get the original value.
|
||||
* @warning The blob recovered must be decoded to get the original value.
|
||||
*
|
||||
* @param avl_group A pointer to the AVL tree.
|
||||
* @param index The index of the value in the data array.
|
||||
*
|
||||
* @returns A pointer to the byte array recovered.
|
||||
* @returns A pointer to the blob recovered.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
|
||||
Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds a value (byte array) in an AVL tree group, checking if it is already in it.
|
||||
* @brief Adds a value (blob) in an AVL tree group, checking if it is already in it.
|
||||
*
|
||||
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t).
|
||||
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
|
||||
*
|
||||
* @param avl_group A pointer to the AVL tree group.
|
||||
* @param value The byte array to add in the AVL tree group.
|
||||
* @param value The blob to add in the AVL tree group.
|
||||
*
|
||||
* @returns The index of the value newly added in the AVL tree group.
|
||||
* @retval -1 if an error occurred.
|
||||
@ -393,7 +394,7 @@ Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value);
|
||||
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value);
|
||||
|
||||
|
||||
#endif /* OBIAVL_H_ */
|
||||
|
130
src/obiblob.c
Normal file
130
src/obiblob.c
Normal file
@ -0,0 +1,130 @@
|
||||
/****************************************************************************
|
||||
* Obiblob functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obiblob.c
|
||||
* @author Celine Mercier
|
||||
* @date April 11th 2016
|
||||
* @brief Functions handling Obiblob structures.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "obierrno.h"
|
||||
#include "obitypes.h" // For byte_t type
|
||||
#include "obidebug.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
// TODO: endianness problem?
|
||||
|
||||
|
||||
Obi_blob_p obi_blob(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value)
|
||||
{
|
||||
Obi_blob_p blob;
|
||||
|
||||
// Allocate the memory for the blob structure
|
||||
blob = (Obi_blob_p) malloc(sizeof(Obi_blob_t) + length_encoded_value);
|
||||
if (blob == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a blob");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Store the number of bits on which each element is encoded
|
||||
blob->element_size = element_size;
|
||||
|
||||
// Store the length (in bytes) of the encoded value
|
||||
blob->length_encoded_value = length_encoded_value;
|
||||
|
||||
// Store the initial length (in bytes) of the decoded value
|
||||
blob->length_decoded_value = length_decoded_value;
|
||||
|
||||
// Store the encoded value
|
||||
memcpy(blob->value, encoded_value, length_encoded_value);
|
||||
|
||||
return blob;
|
||||
}
|
||||
|
||||
|
||||
Obi_blob_p obi_str_to_blob(char* value)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
int32_t length;
|
||||
|
||||
// Compute the number of bytes on which the value will be encoded
|
||||
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
|
||||
|
||||
value_b = obi_blob(value, ELEMENT_SIZE_STR, length, length);
|
||||
if (value_b == NULL)
|
||||
{
|
||||
obidebug(1, "\nError encoding a character string in a blob");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return value_b;
|
||||
}
|
||||
|
||||
|
||||
const char* obi_blob_to_str(Obi_blob_p value_b)
|
||||
{
|
||||
return value_b->value;
|
||||
}
|
||||
|
||||
|
||||
Obi_blob_p obi_seq_to_blob(char* seq)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
int32_t length_encoded_seq; // length of the encoded sequence in bytes
|
||||
int32_t seq_length;
|
||||
byte_t* encoded_seq;
|
||||
|
||||
seq_length = strlen(seq);
|
||||
|
||||
// Check if just ATGC and encode accordingly
|
||||
if (only_ATGC(seq))
|
||||
{
|
||||
// Compute the length (in bytes) of the encoded sequence
|
||||
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
|
||||
// Encode
|
||||
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
|
||||
if (encoded_seq == NULL)
|
||||
return NULL;
|
||||
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compute the length (in bytes) of the encoded sequence
|
||||
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
|
||||
// Encode
|
||||
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
|
||||
if (encoded_seq == NULL)
|
||||
return NULL;
|
||||
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
|
||||
}
|
||||
|
||||
free(encoded_seq);
|
||||
|
||||
return value_b;
|
||||
}
|
||||
|
||||
|
||||
const char* obi_blob_to_seq(Obi_blob_p value_b)
|
||||
{
|
||||
// Decode
|
||||
if (value_b->element_size == 2)
|
||||
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
|
||||
else
|
||||
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
|
||||
}
|
||||
|
||||
|
||||
// TODO same for int
|
||||
|
108
src/obiblob.h
Normal file
108
src/obiblob.h
Normal file
@ -0,0 +1,108 @@
|
||||
/****************************************************************************
|
||||
* Encoding header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obiblob.h
|
||||
* @author Celine Mercier
|
||||
* @date November 18th 2015
|
||||
* @brief Header file for handling Obi_blob structures.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OBIBLOB_H_
|
||||
#define OBIBLOB_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
|
||||
*/
|
||||
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
|
||||
*/
|
||||
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Blob structure.
|
||||
* TODO
|
||||
*/
|
||||
typedef struct Obi_blob {
|
||||
uint8_t element_size; /**< Size in bits of one element from the value.
|
||||
*/
|
||||
int32_t length_encoded_value; /**< Length in bytes of the encoded value.
|
||||
*/
|
||||
int32_t length_decoded_value; /**< Length in bytes of the decoded value.
|
||||
*/
|
||||
byte_t value[]; /**< Encoded value.
|
||||
*/
|
||||
} Obi_blob_t, *Obi_blob_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a character string to a blob.
|
||||
*
|
||||
* @warning The blob must be freed by the caller.
|
||||
*
|
||||
* @param value The character string to convert.
|
||||
*
|
||||
* @returns A pointer to the blob created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_str_to_blob(char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a blob to a character string.
|
||||
*
|
||||
* @param value_b The blob to convert.
|
||||
*
|
||||
* @returns A pointer to the character string contained in the blob.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_blob_to_str(Obi_blob_p value_b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a DNA sequence to a blob with a header.
|
||||
*
|
||||
* @warning The blob must be freed by the caller.
|
||||
*
|
||||
* @param value The DNA sequence to convert.
|
||||
*
|
||||
* @returns A pointer to the blob created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_seq_to_blob(char* seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a blob to a DNA sequence.
|
||||
*
|
||||
* @param value_b The blob to convert.
|
||||
*
|
||||
* @returns A pointer to the DNA sequence contained in the blob.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_blob_to_seq(Obi_blob_p value_b); // TODO move to encode source files
|
||||
|
||||
|
||||
#endif /* OBIBLOB_H_ */
|
||||
|
@ -33,7 +33,7 @@
|
||||
|
||||
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value)
|
||||
{
|
||||
Obi_byte_array_p value_b;
|
||||
Obi_blob_p value_b;
|
||||
index_t idx;
|
||||
|
||||
// Check that the line number is not greater than the maximum allowed
|
||||
@ -57,7 +57,7 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
|
||||
(column->header)->lines_used = line_nb+1;
|
||||
|
||||
// Encode the value on a byte array with a header // TODO make function
|
||||
value_b = obi_seq_to_obibytes(value);
|
||||
value_b = obi_seq_to_blob(value);
|
||||
if (value_b == NULL)
|
||||
return -1;
|
||||
|
||||
@ -115,7 +115,7 @@ int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c
|
||||
const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
|
||||
{
|
||||
index_t idx;
|
||||
Obi_byte_array_p value_b;
|
||||
Obi_blob_p value_b;
|
||||
|
||||
if ((line_nb+1) > ((column->header)->line_count))
|
||||
{
|
||||
@ -132,7 +132,7 @@ const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t l
|
||||
|
||||
value_b = obi_avl_group_get(column->avl, idx);
|
||||
|
||||
return obi_obibytes_to_seq(value_b);
|
||||
return obi_blob_to_seq(value_b);
|
||||
}
|
||||
|
||||
|
||||
|
@ -32,7 +32,7 @@
|
||||
|
||||
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value)
|
||||
{
|
||||
Obi_byte_array_p value_b;
|
||||
Obi_blob_p value_b;
|
||||
index_t idx;
|
||||
|
||||
// Check that the line number is not greater than the maximum allowed
|
||||
@ -56,7 +56,7 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
|
||||
(column->header)->lines_used = line_nb+1;
|
||||
|
||||
// Encode the value on a byte array with a header
|
||||
value_b = obi_str_to_obibytes(value);
|
||||
value_b = obi_str_to_blob(value);
|
||||
if (value_b == NULL)
|
||||
return -1;
|
||||
|
||||
@ -84,7 +84,7 @@ int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((view->line_selection != NULL) || (!(column->writable)))
|
||||
if ((view->line_selection != NULL) || (!(column->writable))) // TODO why check here writable?
|
||||
{
|
||||
// Get the right line number
|
||||
if (column->writable)
|
||||
@ -114,7 +114,7 @@ int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c
|
||||
const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
|
||||
{
|
||||
index_t idx;
|
||||
Obi_byte_array_p value_b;
|
||||
Obi_blob_p value_b;
|
||||
|
||||
if ((line_nb+1) > ((column->header)->line_count))
|
||||
{
|
||||
@ -131,7 +131,7 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l
|
||||
|
||||
value_b = obi_avl_group_get(column->avl, idx);
|
||||
|
||||
return obi_obibytes_to_str(value_b);
|
||||
return obi_blob_to_str(value_b);
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user