/**************************************************************************** * Encoding functions * ****************************************************************************/ /** * @file encode.c * @author Celine Mercier * @date November 18th 2015 * @brief Functions encoding DNA sequences. */ #include #include #include #include #include "encode.h" #include "obiarray.h" #include "obidebug.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) // TODO: endianness problem? bool only_ATGC(char* seq) { char* c = seq; while (*c) { if (!((*c == 'A') || \ (*c == 'T') || \ (*c == 'G') || \ (*c == 'C') || \ (*c == 'a') || \ (*c == 't') || \ (*c == 'g') || \ (*c == 'c'))) { return 0; } else { c++; } } return 1; } byte_t* encode_seq_on_2_bits(char* seq, int32_t length) // TODO shift = 2 { byte_t* seq_b; uint8_t shift; int32_t length_b; int32_t i; // fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>Encoding sequence %s", seq); length_b = ceil((double) length / (double) 4.0); // fprintf(stderr, "\nLength: %d", length_b); seq_b = (byte_t*) malloc(length_b * sizeof(byte_t)); memset(seq_b, 0, length_b); for (i=0; i>>>>>>>>Encoded:"); // print_bits(seq_b, length_b); return seq_b; } char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq) { char* seq; int32_t i; uint8_t shift; uint8_t mask; uint8_t nuc; seq = (char*) malloc((length_seq+1) * sizeof(char)); for (i=0; i> shift; switch (nuc) { case NUC_A: seq[i] = 'a'; break; case NUC_C: seq[i] = 'c'; break; case NUC_G: seq[i] = 'g'; break; case NUC_T: seq[i] = 't'; break; default: obidebug(1, "\nInvalid nucleotide base when decoding"); return NULL; } } seq[length_seq] = '\0'; return seq; } ////////// FOR DEBUGGING /////////// // little endian void print_bits(void* ptr, int32_t size) { uint8_t* b = (uint8_t*) ptr; uint8_t byte; int32_t i, j; fprintf(stderr, "\n"); for (i=0;i=0;j--) { byte = b[i] & (1<>= j; fprintf(stderr, "%u", byte); } fprintf(stderr, " "); } fprintf(stderr, "\n"); }