/**************************************************************************** * Encoding header file * ****************************************************************************/ /** * @file encode.h * @author Celine Mercier * @date November 18th 2015 * @brief Header file for encoding DNA sequences. */ #include #include #include #include #include "obiarray.h" #define NUC_MASK 0x3 /**< Binary: 11 to use when decoding */ /** * @brief enum for the 2-bits codes for each of the 4 nucleotides. */ enum { NUC_A = 0x0, /* binary: 00 */ NUC_C = 0x1, /* binary: 01 */ NUC_G = 0x2, /* binary: 10 */ NUC_T = 0x3, /* binary: 11 */ }; /** * @brief Checks if there are only 'atgcATGC' characters in a * character string. * * @param seq The sequence to check. * * @returns A boolean value indicating if there are only * 'atgcATGC' characters in a character string. * * @since November 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ bool only_ATGC(char* seq); /** * @brief Encodes a DNA sequence with each nucleotide coded on 2 bits. * * A or a : 00 * C or c : 01 * T or t : 10 * G or g : 11 * * @warning The DNA sequence must contain only 'atgcATGC' characters. * * @param seq The sequence to encode. * @param length The length of the sequence to encode. * * @returns The encoded sequence. * * @since November 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ byte_t* encode_seq_on_2_bits(char* seq, int32_t length); /** * @brief Decodes a DNA sequence that is coded with each nucleotide on 2 bits. * * A or a : 00 * C or c : 01 * T or t : 10 * G or g : 11 * * @param seq The sequence to decode. * @param length_seq The initial length of the sequence before it was encoded. * * @returns The decoded sequence ended with '\0'. * * @since November 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq); ////////// FOR DEBUGGING /////////// // little endian void print_bits(void* ptr, int32_t length);