Major update: New column type to store sequence qualities. Closes #41
This commit is contained in:
@ -14,6 +14,7 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "char_str_indexer.h"
|
||||
#include "obiblob.h"
|
||||
#include "obiblob_indexer.h"
|
||||
#include "obidebug.h"
|
||||
@ -25,24 +26,16 @@
|
||||
|
||||
Obi_blob_p obi_str_to_blob(const char* value)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
int32_t length;
|
||||
int32_t length;
|
||||
|
||||
// Compute the number of bytes on which the value will be encoded
|
||||
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
|
||||
|
||||
value_b = obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length);
|
||||
if (value_b == NULL)
|
||||
{
|
||||
obidebug(1, "\nError encoding a character string in a blob");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return value_b;
|
||||
return obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length);
|
||||
}
|
||||
|
||||
|
||||
char* obi_blob_to_str(Obi_blob_p value_b)
|
||||
const char* obi_blob_to_str(Obi_blob_p value_b)
|
||||
{
|
||||
return value_b->value;
|
||||
}
|
||||
@ -67,7 +60,7 @@ index_t obi_index_char_str(Obi_indexer_p indexer, const char* value)
|
||||
}
|
||||
|
||||
|
||||
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx)
|
||||
const char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
|
||||
|
@ -35,7 +35,7 @@
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_str_to_blob(char* value);
|
||||
Obi_blob_p obi_str_to_blob(const char* value);
|
||||
|
||||
|
||||
/**
|
||||
@ -80,7 +80,7 @@ index_t obi_index_char_str(Obi_indexer_p indexer, const char* value);
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx);
|
||||
const char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx);
|
||||
|
||||
|
||||
#endif /* CHAR_STR_INDEXER_H_ */
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "dna_seq_indexer.h"
|
||||
#include "obiblob.h"
|
||||
#include "obiblob_indexer.h"
|
||||
#include "obidebug.h"
|
||||
|
@ -23,6 +23,8 @@
|
||||
|
||||
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
|
||||
*/
|
||||
#define ELEMENT_SIZE_UINT8 (8) /**< The size of an element from a value of type uint8_t.
|
||||
*/
|
||||
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
|
||||
*/
|
||||
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.
|
||||
|
@ -6,7 +6,7 @@
|
||||
* @file obidsmcolumn_qual.c
|
||||
* @author Celine Mercier
|
||||
* @date May 4th 2016
|
||||
* @brief Functions handling OBIColumns containing data in the form of indices referring to sequence quality arrays.
|
||||
* @brief Functions handling OBIColumns containing data in the form of indices referring to sequence qualities.
|
||||
*/
|
||||
|
||||
|
||||
@ -14,9 +14,10 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "obidmscolumn_qual.h"
|
||||
#include "obidmscolumn.h"
|
||||
#include "obitypes.h"
|
||||
#include "obidmscolumn_str.c"
|
||||
#include "uint8_indexer.h"
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
@ -26,38 +27,83 @@
|
||||
**********************************************************************/
|
||||
|
||||
int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value)
|
||||
{ // TODO discuss
|
||||
return obi_column_set_obistr_with_elt_idx(column, line_nb, element_idx, value);
|
||||
{
|
||||
uint8_t* int_value;
|
||||
int int_value_length;
|
||||
int i;
|
||||
int ret_value;
|
||||
|
||||
int_value_length = strlen(value);
|
||||
int_value = (uint8_t*) malloc(int_value_length * sizeof(uint8_t));
|
||||
|
||||
// Convert in uint8_t array to index in that format
|
||||
for (i=0; i<int_value_length; i++)
|
||||
int_value[i] = ((uint8_t)(value[i])) - QUALITY_ASCII_BASE;
|
||||
|
||||
ret_value = obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, int_value, int_value_length);
|
||||
|
||||
free(int_value);
|
||||
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
|
||||
int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value)
|
||||
int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length)
|
||||
{
|
||||
char* value_char;
|
||||
index_t idx;
|
||||
char* new_indexer_name;
|
||||
|
||||
// Transform the int array into a char array
|
||||
// Length??
|
||||
//value_char = ;
|
||||
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
|
||||
return -1;
|
||||
|
||||
obi_column_set_obiqual_char_with_elt_idx(column, line_nb, element_idx, value_char)
|
||||
// Add the value in the indexer
|
||||
idx = obi_index_uint8(column->indexer, value, value_length);
|
||||
if (idx == -1) // An error occurred
|
||||
{
|
||||
if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR)
|
||||
{
|
||||
// If the error is that the indexer is read-only, clone it
|
||||
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
|
||||
if (new_indexer_name == NULL)
|
||||
return -1;
|
||||
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
|
||||
// Add the value in the new indexer
|
||||
idx = obi_index_uint8(column->indexer, value, value_length);
|
||||
if (idx == -1)
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
// Add the value's index in the column
|
||||
*(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = idx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
|
||||
{ // TODO discuss
|
||||
char* value;
|
||||
{
|
||||
char* value;
|
||||
const uint8_t* int_value;
|
||||
int int_value_length;
|
||||
int i;
|
||||
|
||||
value = obi_column_get_obistr_with_elt_idx(column, line_nb, element_idx);
|
||||
if (strcmp(value, OBIStr_NA) == 0)
|
||||
return OBIQual_char_NA;
|
||||
int_value = obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, &int_value_length);
|
||||
|
||||
value = (char*) malloc((int_value_length + 1) * sizeof(char));
|
||||
|
||||
// Encode int quality to char quality
|
||||
for (i=0; i<int_value_length; i++)
|
||||
value[i] = (char)(int_value[i] + QUALITY_ASCII_BASE);
|
||||
|
||||
value[i] = '\0';
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) // TODO const? (mapped)
|
||||
const uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length)
|
||||
{
|
||||
index_t idx;
|
||||
|
||||
@ -70,7 +116,7 @@ uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t
|
||||
if (idx == OBIIdx_NA)
|
||||
return OBIQual_int_NA;
|
||||
|
||||
return obi_retrieve_quality_int(column->indexer, idx);
|
||||
return obi_retrieve_uint8(column->indexer, idx, value_length);
|
||||
}
|
||||
|
||||
|
||||
@ -84,13 +130,13 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li
|
||||
}
|
||||
|
||||
|
||||
int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, uint8_t* value)
|
||||
int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length)
|
||||
{
|
||||
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||
if (element_idx == OBIIdx_NA)
|
||||
return -1;
|
||||
|
||||
return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value);
|
||||
return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value, value_length);
|
||||
}
|
||||
|
||||
|
||||
@ -104,12 +150,12 @@ char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t
|
||||
}
|
||||
|
||||
|
||||
uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name) // TODO const? (mapped)
|
||||
const uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length)
|
||||
{
|
||||
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||
if (element_idx == OBIIdx_NA)
|
||||
return OBIQual_int_NA;
|
||||
|
||||
return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx);
|
||||
return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, value_length);
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
* @file obidsmcolumn_qual.h
|
||||
* @author Celine Mercier
|
||||
* @date May 4th 2016
|
||||
* @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to sequence quality arrays.
|
||||
* @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to sequence qualities.
|
||||
*/
|
||||
|
||||
|
||||
@ -22,6 +22,12 @@
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define QUALITY_ASCII_BASE (33) /**< The ASCII base of sequence quality.
|
||||
* Used to convert sequence qualities from characters to integers
|
||||
* and the other way around.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line.
|
||||
@ -56,7 +62,8 @@ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t lin
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be set.
|
||||
* @param element_idx The index of the element that should be set in the line.
|
||||
* @param value The value that should be set, in the integer format.
|
||||
* @param value The value that should be set, in the integer array format.
|
||||
* @param value_length The length of the integer array.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
@ -65,7 +72,7 @@ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t lin
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, uint8_t* value);
|
||||
int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length);
|
||||
|
||||
|
||||
/**
|
||||
@ -79,7 +86,7 @@ int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line
|
||||
* @param element_idx The index of the element that should be recovered in the line.
|
||||
*
|
||||
* @returns The recovered value, in the character string format.
|
||||
* @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
* @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
@ -96,14 +103,15 @@ char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t l
|
||||
* @param column A pointer as returned by obi_create_column().
|
||||
* @param line_nb The number of the line where the value should be recovered.
|
||||
* @param element_idx The index of the element that should be recovered in the line.
|
||||
* @param value_length A pointer on an integer to store the length of the integer array recovered.
|
||||
*
|
||||
* @returns The recovered value, in the integer format.
|
||||
* @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
* @returns The recovered value, in the integer array format.
|
||||
* @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
|
||||
const uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length);
|
||||
|
||||
|
||||
/**
|
||||
@ -133,7 +141,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li
|
||||
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line.
|
||||
*
|
||||
* This function is for quality scores in the integer format.
|
||||
* This function is for quality scores in the integer array format.
|
||||
*
|
||||
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||
*
|
||||
@ -141,6 +149,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li
|
||||
* @param line_nb The number of the line where the value should be set.
|
||||
* @param element_name The name of the element that should be set in the line.
|
||||
* @param value The value that should be set, in the integer format.
|
||||
* @param value_length The length of the integer array.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
@ -149,7 +158,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, uint8_t* value);
|
||||
int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length);
|
||||
|
||||
|
||||
/**
|
||||
@ -163,7 +172,7 @@ int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t lin
|
||||
* @param element_name The name of the element that should be recovered in the line.
|
||||
*
|
||||
* @returns The recovered value, in the character string format.
|
||||
* @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
* @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
@ -175,19 +184,20 @@ char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t
|
||||
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line.
|
||||
*
|
||||
* This function returns quality scores in the integer format.
|
||||
* This function returns quality scores in the integer array format.
|
||||
*
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be recovered.
|
||||
* @param element_name The name of the element that should be recovered in the line.
|
||||
* @param value_length A pointer on an integer to store the length of the integer array recovered.
|
||||
*
|
||||
* @returns The recovered value, in the integer format.
|
||||
* @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
* @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
utin8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
|
||||
const uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length);
|
||||
|
||||
|
||||
#endif /* OBIDMSCOLUMN_QUAL_H_ */
|
||||
|
@ -29,17 +29,6 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
|
||||
index_t idx;
|
||||
char* new_indexer_name;
|
||||
|
||||
// TODO
|
||||
// size_t i;
|
||||
// uint8_t q;
|
||||
// for (i=0;i<=strlen(value);i++)
|
||||
// {
|
||||
// if ()
|
||||
// q = ((uint8_t) value[i]) - 33;
|
||||
// fprintf(stderr, "\n%c == %u", value[i], q);
|
||||
// }
|
||||
|
||||
|
||||
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
|
||||
return -1;
|
||||
|
||||
|
@ -25,7 +25,7 @@
|
||||
#define OBISeq_NA ("\0") /**< NA value for the type OBI_SEQ */ // TODO discuss
|
||||
#define OBIStr_NA ("\0") /**< NA value for the type OBI_STR */ // TODO discuss
|
||||
#define OBIQual_char_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in character string format */ // TODO test and discuss
|
||||
#define OBIQual_int_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in integer format */ // TODO test and discuss
|
||||
#define OBIQual_int_NA (NULL) /**< NA value for the type OBI_QUAL if the quality is in integer format */ // TODO test and discuss
|
||||
|
||||
|
||||
/**
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "obidmscolumn_char.h"
|
||||
#include "obidmscolumn_float.h"
|
||||
#include "obidmscolumn_int.h"
|
||||
#include "obidmscolumn_qual.h"
|
||||
#include "obidmscolumn_seq.h"
|
||||
#include "obidmscolumn_str.h"
|
||||
#include "obierrno.h"
|
||||
@ -600,8 +601,6 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
|
||||
if (view_to_clone == NULL)
|
||||
{
|
||||
// TODO Add quality column?
|
||||
|
||||
// Adding sequence column
|
||||
if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", "Nucleotide sequences", true) < 0)
|
||||
{
|
||||
@ -620,6 +619,12 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
|
||||
return NULL;
|
||||
}
|
||||
// Adding quality column
|
||||
if (obi_view_add_column(view, QUALITY_COLUMN, -1, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", "Sequence qualities", true) < 0)
|
||||
{
|
||||
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return view;
|
||||
@ -1490,6 +1495,78 @@ obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_colu
|
||||
/****************************************/
|
||||
|
||||
|
||||
/*********** FOR QUAL COLUMNS ***********/
|
||||
|
||||
int obi_column_set_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value)
|
||||
{
|
||||
if (prepare_to_set_value_in_column(view, &column, &line_nb) < 0)
|
||||
return -1;
|
||||
return obi_column_set_obiqual_char_with_elt_idx(column, line_nb, element_idx, value);
|
||||
}
|
||||
|
||||
|
||||
int obi_column_set_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length)
|
||||
{
|
||||
if (prepare_to_set_value_in_column(view, &column, &line_nb) < 0)
|
||||
return -1;
|
||||
return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value, value_length);
|
||||
}
|
||||
|
||||
|
||||
char* obi_column_get_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx)
|
||||
{
|
||||
if (prepare_to_get_value_from_column(view, &line_nb) < 0)
|
||||
return OBIQual_char_NA;
|
||||
return obi_column_get_obiqual_char_with_elt_idx(column, line_nb, element_idx);
|
||||
}
|
||||
|
||||
|
||||
const uint8_t* obi_column_get_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length)
|
||||
{
|
||||
if (prepare_to_get_value_from_column(view, &line_nb) < 0)
|
||||
return OBIQual_int_NA;
|
||||
return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, value_length);
|
||||
}
|
||||
|
||||
|
||||
int obi_column_set_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value)
|
||||
{
|
||||
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||
if (element_idx == OBIIdx_NA)
|
||||
return -1;
|
||||
return obi_column_set_obiqual_char_with_elt_idx_in_view(view, column, line_nb, element_idx, value);
|
||||
}
|
||||
|
||||
|
||||
int obi_column_set_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length)
|
||||
{
|
||||
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||
if (element_idx == OBIIdx_NA)
|
||||
return -1;
|
||||
return obi_column_set_obiqual_int_with_elt_idx_in_view(view, column, line_nb, element_idx, value, value_length);
|
||||
}
|
||||
|
||||
|
||||
char* obi_column_get_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name)
|
||||
{
|
||||
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||
if (element_idx == OBIIdx_NA)
|
||||
return OBIQual_char_NA;
|
||||
return obi_column_get_obiqual_char_with_elt_idx_in_view(view, column, line_nb, element_idx);
|
||||
}
|
||||
|
||||
|
||||
const uint8_t* obi_column_get_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length)
|
||||
{
|
||||
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||
if (element_idx == OBIIdx_NA)
|
||||
return OBIQual_int_NA;
|
||||
return obi_column_get_obiqual_int_with_elt_idx_in_view(view, column, line_nb, element_idx, value_length);
|
||||
}
|
||||
|
||||
/****************************************/
|
||||
|
||||
|
||||
/*********** FOR SEQ COLUMNS ***********/
|
||||
|
||||
int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value)
|
||||
|
194
src/obiview.h
194
src/obiview.h
@ -17,6 +17,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
@ -50,6 +51,9 @@
|
||||
#define DEFINITION_COLUMN "DEFINITION" /**< The name of the column containing the sequence definitions
|
||||
* in NUC_SEQS_VIEW views.
|
||||
*/
|
||||
#define QUALITY_COLUMN "QUALITY" /**< The name of the column containing the sequence qualities
|
||||
* in NUC_SEQS_VIEW views.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@ -215,6 +219,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
|
||||
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
|
||||
* - ID_COLUMN where sequence identifiers are stored
|
||||
* - DEFINITION_COLUMN where sequence definitions are stored
|
||||
* - QUALITY_COLUMN where sequence qualities are stored
|
||||
*
|
||||
* @param dms A pointer on the OBIDMS.
|
||||
* @param view_name The unique name of the view.
|
||||
@ -246,6 +251,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
|
||||
* - ID_COLUMN where sequence identifiers are stored
|
||||
* - DEFINITION_COLUMN where sequence definitions are stored
|
||||
* - QUALITY_COLUMN where sequence qualities are stored
|
||||
*
|
||||
* @param dms A pointer on the OBIDMS.
|
||||
* @param view_name The unique name of the new view.
|
||||
@ -803,6 +809,194 @@ int obi_column_set_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p
|
||||
obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
|
||||
* in the context of a view.
|
||||
*
|
||||
* This function is for qualities in the character string format.
|
||||
*
|
||||
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||
*
|
||||
* @param view A pointer on the opened view.
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be set.
|
||||
* @param element_idx The index of the element that should be set in the line.
|
||||
* @param value The value that should be set, in the character string format.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_column_set_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
|
||||
* in the context of a view.
|
||||
*
|
||||
* This function is for qualities in the integer format.
|
||||
*
|
||||
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||
*
|
||||
* @param view A pointer on the opened view.
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be set.
|
||||
* @param element_idx The index of the element that should be set in the line.
|
||||
* @param value The value that should be set, in the integer array format.
|
||||
* @param value_length The length of the integer array.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_column_set_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
|
||||
* in the context of a view.
|
||||
*
|
||||
* This function returns quality scores in the character string format.
|
||||
*
|
||||
* @param view A pointer on the opened view.
|
||||
* @param column A pointer as returned by obi_create_column().
|
||||
* @param line_nb The number of the line where the value should be recovered.
|
||||
* @param element_idx The index of the element that should be recovered in the line.
|
||||
*
|
||||
* @returns The recovered value, in the character string format.
|
||||
* @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_column_get_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
|
||||
* in the context of a view.
|
||||
*
|
||||
* This function returns quality scores in the integer format.
|
||||
*
|
||||
* @param view A pointer on the opened view.
|
||||
* @param column A pointer as returned by obi_create_column().
|
||||
* @param line_nb The number of the line where the value should be recovered.
|
||||
* @param element_idx The index of the element that should be recovered in the line.
|
||||
* @param value_length A pointer on an integer to store the length of the integer array recovered.
|
||||
*
|
||||
* @returns The recovered value, in the integer array format.
|
||||
* @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const uint8_t* obi_column_get_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
|
||||
* in the context of a view.
|
||||
*
|
||||
* This function is for quality scores in the character string format.
|
||||
*
|
||||
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||
*
|
||||
* @param view A pointer on the opened view.
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be set.
|
||||
* @param element_name The name of the element that should be set in the line.
|
||||
* @param value The value that should be set, in the character string format.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_column_set_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
|
||||
* in the context of a view.
|
||||
*
|
||||
* This function is for quality scores in the integer array format.
|
||||
*
|
||||
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||
*
|
||||
* @param view A pointer on the opened view.
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be set.
|
||||
* @param element_name The name of the element that should be set in the line.
|
||||
* @param value The value that should be set, in the integer format.
|
||||
* @param value_length The length of the integer array.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_column_set_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
|
||||
* in the context of a view.
|
||||
*
|
||||
* This function returns quality scores in the character string format.
|
||||
*
|
||||
* @param view A pointer on the opened view.
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be recovered.
|
||||
* @param element_name The name of the element that should be recovered in the line.
|
||||
*
|
||||
* @returns The recovered value, in the character string format.
|
||||
* @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_column_get_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
|
||||
* in the context of a view.
|
||||
*
|
||||
* This function returns quality scores in the integer array format.
|
||||
*
|
||||
* @param view A pointer on the opened view.
|
||||
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||
* @param line_nb The number of the line where the value should be recovered.
|
||||
* @param element_name The name of the element that should be recovered in the line.
|
||||
* @param value_length A pointer on an integer to store the length of the integer array recovered.
|
||||
*
|
||||
* @returns The recovered value, in the integer format.
|
||||
* @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set.
|
||||
*
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const uint8_t* obi_column_get_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets a value in an OBIDMS column containing data with the type OBI_SEQ, using the index of the element in the line,
|
||||
* in the context of a view.
|
||||
|
@ -1,19 +1,21 @@
|
||||
/****************************************************************************
|
||||
* Sequence quality scores indexing functions *
|
||||
* Uint8 indexing functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file quality_indexer.c
|
||||
* @file uint8_indexer.c
|
||||
* @author Celine Mercier
|
||||
* @date May 4th 2016
|
||||
* @brief Functions handling the indexing and retrieval of sequence quality scores.
|
||||
* @brief Functions handling the indexing and retrieval of uint8 arrays.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "uint8_indexer.h"
|
||||
#include "obiblob.h"
|
||||
#include "obiblob_indexer.h"
|
||||
#include "obidebug.h"
|
||||
@ -23,60 +25,25 @@
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
Obi_blob_p obi_uint8_to_blob(const char* quality)
|
||||
Obi_blob_p obi_uint8_to_blob(const uint8_t* value, int value_length)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
int32_t length_encoded_seq; // length of the encoded sequence in bytes
|
||||
int32_t seq_length;
|
||||
byte_t* encoded_seq;
|
||||
|
||||
seq_length = strlen(seq);
|
||||
|
||||
// Check if just ATGC and encode accordingly
|
||||
if (only_ATGC(seq))
|
||||
{
|
||||
// Compute the length (in bytes) of the encoded sequence
|
||||
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
|
||||
// Encode
|
||||
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
|
||||
if (encoded_seq == NULL)
|
||||
return NULL;
|
||||
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compute the length (in bytes) of the encoded sequence
|
||||
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
|
||||
// Encode
|
||||
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
|
||||
if (encoded_seq == NULL)
|
||||
return NULL;
|
||||
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
|
||||
}
|
||||
|
||||
free(encoded_seq);
|
||||
|
||||
return value_b;
|
||||
return obi_blob((byte_t*)value, ELEMENT_SIZE_UINT8, value_length, value_length);
|
||||
}
|
||||
|
||||
|
||||
char* obi_blob_to_quality_char(Obi_blob_p value_b)
|
||||
const uint8_t* obi_blob_to_uint8(Obi_blob_p value_b)
|
||||
{
|
||||
// Decode
|
||||
if (value_b->element_size == 2)
|
||||
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
|
||||
else
|
||||
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
|
||||
return ((uint8_t*) (value_b->value));
|
||||
}
|
||||
|
||||
|
||||
index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value)
|
||||
index_t obi_index_uint8(Obi_indexer_p indexer, const uint8_t* value, int value_length)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
index_t idx;
|
||||
|
||||
// Encode value
|
||||
value_b = obi_seq_to_blob(value);
|
||||
value_b = obi_uint8_to_blob(value, value_length);
|
||||
if (value_b == NULL)
|
||||
return -1;
|
||||
|
||||
@ -89,7 +56,7 @@ index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value)
|
||||
}
|
||||
|
||||
|
||||
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
|
||||
const uint8_t* obi_retrieve_uint8(Obi_indexer_p indexer, index_t idx, int* value_length)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
|
||||
@ -97,6 +64,7 @@ char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
|
||||
value_b = obi_indexer_get(indexer, idx);
|
||||
|
||||
// Return decoded sequence
|
||||
return obi_blob_to_seq(value_b);
|
||||
*value_length = value_b->length_decoded_value;
|
||||
return obi_blob_to_uint8(value_b);
|
||||
}
|
||||
|
||||
|
@ -1,17 +1,17 @@
|
||||
/****************************************************************************
|
||||
* DNA sequence indexer header file *
|
||||
* uint8 indexer header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file dna_seq_indexer.h
|
||||
* @file uint8_indexer.h
|
||||
* @author Celine Mercier
|
||||
* @date April 12th 2016
|
||||
* @brief Header file for the functions handling the indexing of DNA sequences.
|
||||
* @date May 4th 2016
|
||||
* @brief Header file for the functions handling the indexing of uint8 arrays.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DNA_SEQ_INDEXER_H_
|
||||
#define DNA_SEQ_INDEXER_H_
|
||||
#ifndef UINT8_INDEXER_H_
|
||||
#define UINT8_INDEXER_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -24,64 +24,69 @@
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a DNA sequence to a blob.
|
||||
* @brief Converts an uint8 array to a blob.
|
||||
*
|
||||
* @warning The blob must be freed by the caller.
|
||||
*
|
||||
* @param value The DNA sequence to convert.
|
||||
* @param value The uint8 array to convert.
|
||||
* @param value_length The length of the uint8 array to convert.
|
||||
*
|
||||
* @returns A pointer to the blob created.
|
||||
* @returns A pointer on the blob created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_seq_to_blob(const char* seq);
|
||||
Obi_blob_p obi_uint8_to_blob(const uint8_t* value, int value_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a blob to a DNA sequence.
|
||||
* @brief Converts a blob to an uint8 array.
|
||||
*
|
||||
* @warning The array returned is mapped.
|
||||
*
|
||||
* @param value_b The blob to convert.
|
||||
*
|
||||
* @returns A pointer to the DNA sequence contained in the blob.
|
||||
* @returns A pointer on the uint8 array contained in the blob.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_blob_to_seq(Obi_blob_p value_b);
|
||||
const uint8_t* obi_blob_to_uint8(Obi_blob_p value_b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Stores a DNA sequence in an indexer and returns the index.
|
||||
* @brief Stores an uint8 array in an indexer and returns the index.
|
||||
*
|
||||
* @param indexer The indexer structure.
|
||||
* @param value The DNA sequence to index.
|
||||
* @param value The uint8 array to index.
|
||||
* @param value_length The length of the uint8 array to index.
|
||||
*
|
||||
* @returns The index referring to the stored DNA sequence in the indexer.
|
||||
* @returns The index referring to the stored uint8 array in the indexer.
|
||||
*
|
||||
* @since April 2016
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value);
|
||||
index_t obi_index_uint8(Obi_indexer_p indexer, const uint8_t* value, int value_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Retrieves a DNA sequence from an indexer.
|
||||
* @brief Retrieves an uint8 array from an indexer.
|
||||
*
|
||||
* @warning The DNA sequence returned must be freed by the caller.
|
||||
* @warning The array returned is mapped.
|
||||
*
|
||||
* @param indexer The indexer structure.
|
||||
* @param idx The index referring to the DNA sequence to retrieve in the indexer.
|
||||
* @param idx The index referring to the uint8 array to retrieve in the indexer.
|
||||
* @param value_length A pointer on an integer to store the length of the array retrieved.
|
||||
*
|
||||
* @returns A pointer on the DNA sequence.
|
||||
* @returns A pointer on the uint8 array.
|
||||
*
|
||||
* @since April 2016
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx);
|
||||
const uint8_t* obi_retrieve_uint8(Obi_indexer_p indexer, index_t idx, int* value_length);
|
||||
|
||||
|
||||
#endif /* DNA_SEQ_INDEXER_H_ */
|
||||
#endif /* UINT8_INDEXER_H_ */
|
||||
|
||||
|
Reference in New Issue
Block a user