Major update: New column type to store sequence qualities. Closes #41

This commit is contained in:
Celine Mercier
2016-05-20 16:45:22 +02:00
parent ffff91e76c
commit 3b59043ea8
33 changed files with 962 additions and 267 deletions

View File

@ -1,19 +1,21 @@
/****************************************************************************
* Sequence quality scores indexing functions *
* Uint8 indexing functions *
****************************************************************************/
/**
* @file quality_indexer.c
* @file uint8_indexer.c
* @author Celine Mercier
* @date May 4th 2016
* @brief Functions handling the indexing and retrieval of sequence quality scores.
* @brief Functions handling the indexing and retrieval of uint8 arrays.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include "uint8_indexer.h"
#include "obiblob.h"
#include "obiblob_indexer.h"
#include "obidebug.h"
@ -23,60 +25,25 @@
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
Obi_blob_p obi_uint8_to_blob(const char* quality)
Obi_blob_p obi_uint8_to_blob(const uint8_t* value, int value_length)
{
Obi_blob_p value_b;
int32_t length_encoded_seq; // length of the encoded sequence in bytes
int32_t seq_length;
byte_t* encoded_seq;
seq_length = strlen(seq);
// Check if just ATGC and encode accordingly
if (only_ATGC(seq))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
// Encode
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
// Encode
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
}
free(encoded_seq);
return value_b;
return obi_blob((byte_t*)value, ELEMENT_SIZE_UINT8, value_length, value_length);
}
char* obi_blob_to_quality_char(Obi_blob_p value_b)
const uint8_t* obi_blob_to_uint8(Obi_blob_p value_b)
{
// Decode
if (value_b->element_size == 2)
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
else
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
return ((uint8_t*) (value_b->value));
}
index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value)
index_t obi_index_uint8(Obi_indexer_p indexer, const uint8_t* value, int value_length)
{
Obi_blob_p value_b;
index_t idx;
// Encode value
value_b = obi_seq_to_blob(value);
value_b = obi_uint8_to_blob(value, value_length);
if (value_b == NULL)
return -1;
@ -89,7 +56,7 @@ index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value)
}
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
const uint8_t* obi_retrieve_uint8(Obi_indexer_p indexer, index_t idx, int* value_length)
{
Obi_blob_p value_b;
@ -97,6 +64,7 @@ char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
value_b = obi_indexer_get(indexer, idx);
// Return decoded sequence
return obi_blob_to_seq(value_b);
*value_length = value_b->length_decoded_value;
return obi_blob_to_uint8(value_b);
}