/**************************************************************************** * Encoding functions * ****************************************************************************/ /** * @file encode.c * @author Celine Mercier * @date November 18th 2015 * @brief Functions encoding DNA sequences. */ #include #include #include #include #include "encode.h" #include "obiarray.h" #include "obidebug.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) // TODO: endianness problem? bool only_ATGC(char* seq) { char* c = seq; while (*c) { if (!((*c == 'A') || \ (*c == 'T') || \ (*c == 'G') || \ (*c == 'C') || \ (*c == 'a') || \ (*c == 't') || \ (*c == 'g') || \ (*c == 'c'))) { return 0; } else { c++; } } return 1; } byte_t* encode_seq_on_2_bits(char* seq, int32_t length) { byte_t* seq_b; uint8_t modulo; int32_t length_b; int32_t i; length_b = ceil((double) length / (double) 4.0); seq_b = (byte_t*) malloc(length_b * sizeof(byte_t)); // Initialize all the bits to 0 memset(seq_b, 0, length_b); for (i=0; i> shift; switch (nuc) { case NUC_A_2b: seq[i] = 'a'; break; case NUC_C_2b: seq[i] = 'c'; break; case NUC_G_2b: seq[i] = 'g'; break; case NUC_T_2b: seq[i] = 't'; break; default: obidebug(1, "\nInvalid nucleotide base when decoding"); return NULL; } } seq[length_seq] = '\0'; return seq; } byte_t* encode_seq_on_4_bits(char* seq, int32_t length) { byte_t* seq_b; uint8_t modulo; int32_t length_b; int32_t i; length_b = ceil((double) length / (double) 2.0); seq_b = (byte_t*) malloc(length_b * sizeof(byte_t)); // Initialize all the bits to 0 memset(seq_b, 0, length_b); for (i=0; i> shift; switch (nuc) { case NUC_A_4b: seq[i] = 'a'; break; case NUC_C_4b: seq[i] = 'c'; break; case NUC_G_4b: seq[i] = 'g'; break; case NUC_T_4b: seq[i] = 't'; break; case NUC_R_4b: seq[i] = 'r'; break; case NUC_Y_4b: seq[i] = 'y'; break; case NUC_S_4b: seq[i] = 's'; break; case NUC_W_4b: seq[i] = 'w'; break; case NUC_K_4b: seq[i] = 'k'; break; case NUC_M_4b: seq[i] = 'm'; break; case NUC_B_4b: seq[i] = 'b'; break; case NUC_D_4b: seq[i] = 'd'; break; case NUC_H_4b: seq[i] = 'h'; break; case NUC_V_4b: seq[i] = 'v'; break; case NUC_N_4b: seq[i] = 'n'; break; default: obidebug(1, "\nInvalid nucleotide base when decoding"); return NULL; } } seq[length_seq] = '\0'; return seq; } ////////// FOR DEBUGGING /////////// // little endian void print_bits(void* ptr, int32_t size) { uint8_t* b = (uint8_t*) ptr; uint8_t byte; int32_t i, j; fprintf(stderr, "\n"); for (i=0;i=0;j--) { byte = b[i] & (1<>= j; fprintf(stderr, "%u", byte); } fprintf(stderr, " "); } fprintf(stderr, "\n"); }