diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c index 4e205e4..606ed13 100644 --- a/src/obidmscolumn.c +++ b/src/obidmscolumn.c @@ -587,7 +587,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, } // Build the indexer name if needed - if ((data_type == OBI_STR) || (data_type == OBI_SEQ)) + if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || (data_type == OBI_QUAL)) { if (strcmp(indexer_name, "") == 0) { @@ -603,7 +603,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, } returned_data_type = data_type; - if ((data_type == OBI_STR) || (data_type == OBI_SEQ)) + if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || (data_type == OBI_QUAL)) // stored data is indices referring to data stored elsewhere stored_data_type = OBI_IDX; else @@ -750,8 +750,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, if (comments != NULL) strncpy(header->comments, comments, COMMENTS_MAX_LENGTH); - // If the data type is OBI_STR or OBI_SEQ, the associated obi_indexer is opened or created - if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ)) + // If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created + if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL)) { new_column->indexer = obi_indexer(dms, final_indexer_name); if (new_column->indexer == NULL) @@ -900,8 +900,8 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, column->writable = false; - // If the data type is OBI_STR or OBI_SEQ, the associated indexer is opened - if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) + // If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is opened + if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL)) { column->indexer = obi_open_indexer(dms, (column->header)->indexer_name); if (column->indexer == NULL) @@ -1028,8 +1028,8 @@ int obi_close_column(OBIDMS_column_p column) // Close column directory if it was the last column opened from that directory close_dir = obi_dms_is_column_name_in_list(column->dms, (column->header)->name); - // If the data type is OBI_STR or OBI_SEQ, the associated indexer is closed - if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) + // If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed + if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL)) if (obi_close_indexer(column->indexer) < 0) ret_val = -1; @@ -1155,6 +1155,14 @@ int obi_enlarge_column(OBIDMS_column_p column) int column_file_descriptor; char* column_file_name; + // Check if the column is read-only + if (!(column->writable)) + { + obi_set_errno(OBICOL_UNKNOWN_ERROR); + obidebug(1, "\nError trying to enlarge a read-only column"); + return -1; + } + // Get the column file name column_file_name = build_column_file_name((column->header)->name, (column->header)->version); if (column_file_name == NULL) @@ -1274,6 +1282,12 @@ void obi_ini_to_NA_values(OBIDMS_column_p column, } break; + case OBI_QUAL: for (i=start;idata)) + i) = OBIIdx_NA; + } + break; + case OBI_STR: for (i=start;idata)) + i) = OBIIdx_NA; diff --git a/src/obidmscolumn_qual.c b/src/obidmscolumn_qual.c new file mode 100644 index 0000000..307bfbf --- /dev/null +++ b/src/obidmscolumn_qual.c @@ -0,0 +1,115 @@ +/**************************************************************************** + * OBIDMS_column_qual functions * + ****************************************************************************/ + +/** + * @file obidsmcolumn_qual.c + * @author Celine Mercier + * @date May 4th 2016 + * @brief Functions handling OBIColumns containing data in the form of indices referring to sequence quality arrays. + */ + + +#include +#include +#include + +#include "obidmscolumn.h" +#include "obitypes.h" +#include "obidmscolumn_str.c" + + +/********************************************************************** + * + * D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S + * + **********************************************************************/ + +int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value) +{ // TODO discuss + return obi_column_set_obistr_with_elt_idx(column, line_nb, element_idx, value); +} + + +int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value) +{ + char* value_char; + + // Transform the int array into a char array + // Length?? + //value_char = ; + + obi_column_set_obiqual_char_with_elt_idx(column, line_nb, element_idx, value_char) + + return 0; +} + + +char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) +{ // TODO discuss + char* value; + + value = obi_column_get_obistr_with_elt_idx(column, line_nb, element_idx); + if (strcmp(value, OBIStr_NA) == 0) + return OBIQual_char_NA; + + return value; +} + + +uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) // TODO const? (mapped) +{ + index_t idx; + + if (obi_column_prepare_to_get_value(column, line_nb) < 0) + return OBIQual_int_NA; + + idx = *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx); + + // Check NA + if (idx == OBIIdx_NA) + return OBIQual_int_NA; + + return obi_retrieve_quality_int(column->indexer, idx); +} + + +int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value) +{ + index_t element_idx = obi_column_get_element_index_from_name(column, element_name); + if (element_idx == OBIIdx_NA) + return -1; + + return obi_column_set_obiqual_char_with_elt_idx(column, line_nb, element_idx, value); +} + + +int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, uint8_t* value) +{ + index_t element_idx = obi_column_get_element_index_from_name(column, element_name); + if (element_idx == OBIIdx_NA) + return -1; + + return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value); +} + + +char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name) +{ + index_t element_idx = obi_column_get_element_index_from_name(column, element_name); + if (element_idx == OBIIdx_NA) + return OBIQual_char_NA; + + return obi_column_get_obiqual_char_with_elt_idx(column, line_nb, element_idx); +} + + +uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name) // TODO const? (mapped) +{ + index_t element_idx = obi_column_get_element_index_from_name(column, element_name); + if (element_idx == OBIIdx_NA) + return OBIQual_int_NA; + + return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx); +} + diff --git a/src/obidmscolumn_qual.h b/src/obidmscolumn_qual.h new file mode 100644 index 0000000..d8eec88 --- /dev/null +++ b/src/obidmscolumn_qual.h @@ -0,0 +1,194 @@ +/**************************************************************************** + * OBIDMS_column_qual header file * + ****************************************************************************/ + +/** + * @file obidsmcolumn_qual.h + * @author Celine Mercier + * @date May 4th 2016 + * @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to sequence quality arrays. + */ + + +#ifndef OBIDMSCOLUMN_QUAL_H_ +#define OBIDMSCOLUMN_QUAL_H_ + + +#include +#include +#include + +#include "obidmscolumn.h" +#include "obitypes.h" + + +/** + * @brief Sets a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line. + * + * This function is for qualities in the character string format. + * + * @warning Pointers returned by obi_open_column() don't allow writing. + * + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be set. + * @param element_idx The index of the element that should be set in the line. + * @param value The value that should be set, in the character string format. + * + * @returns An integer value indicating the success of the operation. + * @retval 0 on success. + * @retval -1 if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value); + + +/** + * @brief Sets a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line. + * + * This function is for qualities in the integer format. + * + * @warning Pointers returned by obi_open_column() don't allow writing. + * + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be set. + * @param element_idx The index of the element that should be set in the line. + * @param value The value that should be set, in the integer format. + * + * @returns An integer value indicating the success of the operation. + * @retval 0 on success. + * @retval -1 if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, uint8_t* value); + + +/** + * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line. + * + * This function returns quality scores in the character string format. + * + * @param column A pointer as returned by obi_create_column(). + * @param line_nb The number of the line where the value should be recovered. + * @param element_idx The index of the element that should be recovered in the line. + * + * @returns The recovered value, in the character string format. + * @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx); + + +/** + * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line. + * + * This function returns quality scores in the integer format. + * + * @param column A pointer as returned by obi_create_column(). + * @param line_nb The number of the line where the value should be recovered. + * @param element_idx The index of the element that should be recovered in the line. + * + * @returns The recovered value, in the integer format. + * @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx); + + +/** + * @brief Sets a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line. + * + * This function is for quality scores in the character string format. + * + * @warning Pointers returned by obi_open_column() don't allow writing. + * + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be set. + * @param element_name The name of the element that should be set in the line. + * @param value The value that should be set, in the character string format. + * + * @returns An integer value indicating the success of the operation. + * @retval 0 on success. + * @retval -1 if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value); + + +/** + * @brief Sets a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line. + * + * This function is for quality scores in the integer format. + * + * @warning Pointers returned by obi_open_column() don't allow writing. + * + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be set. + * @param element_name The name of the element that should be set in the line. + * @param value The value that should be set, in the integer format. + * + * @returns An integer value indicating the success of the operation. + * @retval 0 on success. + * @retval -1 if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, uint8_t* value); + + +/** + * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line. + * + * This function returns quality scores in the character string format. + * + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be recovered. + * @param element_name The name of the element that should be recovered in the line. + * + * @returns The recovered value, in the character string format. + * @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name); + + +/** + * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line. + * + * This function returns quality scores in the integer format. + * + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be recovered. + * @param element_name The name of the element that should be recovered in the line. + * + * @returns The recovered value, in the integer format. + * @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +utin8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name); + + +#endif /* OBIDMSCOLUMN_QUAL_H_ */ + diff --git a/src/obidmscolumn_str.c b/src/obidmscolumn_str.c index a28e217..74d1d8e 100644 --- a/src/obidmscolumn_str.c +++ b/src/obidmscolumn_str.c @@ -28,6 +28,17 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, { index_t idx; +// TODO +// size_t i; +// uint8_t q; +// for (i=0;i<=strlen(value);i++) +// { +// if () +// q = ((uint8_t) value[i]) - 33; +// fprintf(stderr, "\n%c == %u", value[i], q); +// } + + if (obi_column_prepare_to_set_value(column, line_nb) < 0) return -1; diff --git a/src/obitypes.c b/src/obitypes.c index 15d1c62..3f7e21f 100644 --- a/src/obitypes.c +++ b/src/obitypes.c @@ -40,6 +40,9 @@ size_t obi_sizeof(OBIType_t type) case OBI_CHAR: size = sizeof(obichar_t); break; + case OBI_QUAL: size = sizeof(index_t); + break; + case OBI_STR: size = sizeof(index_t); break; @@ -96,6 +99,9 @@ char* name_data_type(int data_type) case OBI_CHAR: name = strdup("OBI_CHAR"); break; + case OBI_QUAL: name = strdup("OBI_QUAL"); + break; + case OBI_STR: name = strdup("OBI_STR"); break; diff --git a/src/obitypes.h b/src/obitypes.h index 023c640..01d46ab 100644 --- a/src/obitypes.h +++ b/src/obitypes.h @@ -24,6 +24,8 @@ #define OBIChar_NA (0) /**< NA value for the type OBI_CHAR */ // TODO not sure about this one as it can be impossible to distinguish from uninitialized values #define OBISeq_NA ("\0") /**< NA value for the type OBI_SEQ */ // TODO discuss #define OBIStr_NA ("\0") /**< NA value for the type OBI_STR */ // TODO discuss +#define OBIQual_char_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in character string format */ // TODO test and discuss +#define OBIQual_int_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in integer format */ // TODO test and discuss /** @@ -45,6 +47,7 @@ typedef enum OBIType { OBI_FLOAT, /**< a floating value (C type : double) */ OBI_BOOL, /**< a boolean true/false value, see obibool_t enum */ OBI_CHAR, /**< a character (C type : char) */ + OBI_QUAL, /**< an index in a data structure (C type : int64_t) referring to a quality score array */ OBI_STR, /**< an index in a data structure (C type : int64_t) referring to a character string */ OBI_SEQ, /**< an index in a data structure (C type : int64_t) referring to a DNA sequence */ OBI_IDX /**< an index referring to a line in another column (C type : int64_t) */ diff --git a/src/uint8_indexer.c b/src/uint8_indexer.c new file mode 100644 index 0000000..3e0657d --- /dev/null +++ b/src/uint8_indexer.c @@ -0,0 +1,102 @@ +/**************************************************************************** + * Sequence quality scores indexing functions * + ****************************************************************************/ + +/** + * @file quality_indexer.c + * @author Celine Mercier + * @date May 4th 2016 + * @brief Functions handling the indexing and retrieval of sequence quality scores. + */ + + +#include +#include +#include + +#include "obiblob.h" +#include "obiblob_indexer.h" +#include "obidebug.h" +#include "obitypes.h" + + +#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) + + +Obi_blob_p obi_uint8_to_blob(const char* quality) +{ + Obi_blob_p value_b; + int32_t length_encoded_seq; // length of the encoded sequence in bytes + int32_t seq_length; + byte_t* encoded_seq; + + seq_length = strlen(seq); + + // Check if just ATGC and encode accordingly + if (only_ATGC(seq)) + { + // Compute the length (in bytes) of the encoded sequence + length_encoded_seq = ceil((double) seq_length / (double) 4.0); + // Encode + encoded_seq = encode_seq_on_2_bits(seq, seq_length); + if (encoded_seq == NULL) + return NULL; + value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length); + } + else + { + // Compute the length (in bytes) of the encoded sequence + length_encoded_seq = ceil((double) seq_length / (double) 2.0); + // Encode + encoded_seq = encode_seq_on_4_bits(seq, seq_length); + if (encoded_seq == NULL) + return NULL; + value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length); + } + + free(encoded_seq); + + return value_b; +} + + +char* obi_blob_to_quality_char(Obi_blob_p value_b) +{ + // Decode + if (value_b->element_size == 2) + return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value); + else + return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value); +} + + +index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value) +{ + Obi_blob_p value_b; + index_t idx; + + // Encode value + value_b = obi_seq_to_blob(value); + if (value_b == NULL) + return -1; + + // Add in the indexer + idx = obi_indexer_add(indexer, value_b); + + free(value_b); + + return idx; +} + + +char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx) +{ + Obi_blob_p value_b; + + // Get encoded value + value_b = obi_indexer_get(indexer, idx); + + // Return decoded sequence + return obi_blob_to_seq(value_b); +} + diff --git a/src/uint8_indexer.h b/src/uint8_indexer.h new file mode 100644 index 0000000..750ff5c --- /dev/null +++ b/src/uint8_indexer.h @@ -0,0 +1,87 @@ +/**************************************************************************** + * DNA sequence indexer header file * + ****************************************************************************/ + +/** + * @file dna_seq_indexer.h + * @author Celine Mercier + * @date April 12th 2016 + * @brief Header file for the functions handling the indexing of DNA sequences. + */ + + +#ifndef DNA_SEQ_INDEXER_H_ +#define DNA_SEQ_INDEXER_H_ + + +#include +#include + +#include "obidms.h" +#include "obitypes.h" +#include "obiblob.h" +#include "obiblob_indexer.h" + + +/** + * @brief Converts a DNA sequence to a blob. + * + * @warning The blob must be freed by the caller. + * + * @param value The DNA sequence to convert. + * + * @returns A pointer to the blob created. + * @retval NULL if an error occurred. + * + * @since November 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +Obi_blob_p obi_seq_to_blob(const char* seq); + + +/** + * @brief Converts a blob to a DNA sequence. + * + * @param value_b The blob to convert. + * + * @returns A pointer to the DNA sequence contained in the blob. + * @retval NULL if an error occurred. + * + * @since November 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +char* obi_blob_to_seq(Obi_blob_p value_b); + + +/** + * @brief Stores a DNA sequence in an indexer and returns the index. + * + * @param indexer The indexer structure. + * @param value The DNA sequence to index. + * + * @returns The index referring to the stored DNA sequence in the indexer. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value); + + +/** + * @brief Retrieves a DNA sequence from an indexer. + * + * @warning The DNA sequence returned must be freed by the caller. + * + * @param indexer The indexer structure. + * @param idx The index referring to the DNA sequence to retrieve in the indexer. + * + * @returns A pointer on the DNA sequence. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx); + + +#endif /* DNA_SEQ_INDEXER_H_ */ +