Files
obitools3/src/dna_seq_indexer.c
2018-11-19 11:23:54 +01:00

109 lines
2.5 KiB
C
Executable File

/****************************************************************************
* DNA sequence indexing functions *
****************************************************************************/
/**
* @file dna_seq_indexer.c
* @author Celine Mercier
* @date April 12th 2016
* @brief Functions handling the indexing and retrieval of DNA sequences.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "dna_seq_indexer.h"
#include "obiblob.h"
#include "obiblob_indexer.h"
#include "obidebug.h"
#include "obitypes.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
Obi_blob_p obi_seq_to_blob(const char* seq)
{
Obi_blob_p value_b;
int32_t length_encoded_seq; // length of the encoded sequence in bytes
int32_t seq_length;
byte_t* encoded_seq;
seq_length = strlen(seq);
// Check if just ATGC and encode accordingly
if (only_ATGC(seq))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
// Encode
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
// Encode
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
}
free(encoded_seq);
return value_b;
}
char* obi_blob_to_seq(Obi_blob_p value_b)
{
// Decode
if (value_b->element_size == 2)
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
else //if (value_b->element_size == 4) commented for efficiency reasons
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
// else
// {
// fprintf(stderr, "\n BUG \n");
// return NULL;
// }
}
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value)
{
Obi_blob_p value_b;
index_t idx;
// Encode value
value_b = obi_seq_to_blob(value);
if (value_b == NULL)
return -1;
// Add in the indexer
idx = obi_indexer_add(indexer, value_b);
free(value_b);
return idx;
}
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
{
Obi_blob_p value_b;
// Get encoded value
value_b = obi_indexer_get(indexer, idx);
// Return decoded sequence
return obi_blob_to_seq(value_b);
}