96 lines
2.2 KiB
C
96 lines
2.2 KiB
C
![]() |
/****************************************************************************
|
||
|
* Encoding header file *
|
||
|
****************************************************************************/
|
||
|
|
||
|
/**
|
||
|
* @file encode.h
|
||
|
* @author Celine Mercier
|
||
|
* @date November 18th 2015
|
||
|
* @brief Header file for encoding DNA sequences.
|
||
|
*/
|
||
|
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdint.h>
|
||
|
#include <stdbool.h>
|
||
|
|
||
|
#include "obiarray.h"
|
||
|
|
||
|
|
||
|
#define NUC_MASK 0x3 /**< Binary: 11 to use when decoding */
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @brief enum for the 2-bits codes for each of the 4 nucleotides.
|
||
|
*/
|
||
|
enum
|
||
|
{
|
||
|
NUC_A = 0x0, /* binary: 00 */
|
||
|
NUC_C = 0x1, /* binary: 01 */
|
||
|
NUC_G = 0x2, /* binary: 10 */
|
||
|
NUC_T = 0x3, /* binary: 11 */
|
||
|
};
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @brief Checks if there are only 'atgcATGC' characters in a
|
||
|
* character string.
|
||
|
*
|
||
|
* @param seq The sequence to check.
|
||
|
*
|
||
|
* @returns A boolean value indicating if there are only
|
||
|
* 'atgcATGC' characters in a character string.
|
||
|
*
|
||
|
* @since November 2015
|
||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||
|
*/
|
||
|
bool only_ATGC(char* seq);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
|
||
|
*
|
||
|
* A or a : 00
|
||
|
* C or c : 01
|
||
|
* T or t : 10
|
||
|
* G or g : 11
|
||
|
*
|
||
|
* @warning The DNA sequence must contain only 'atgcATGC' characters.
|
||
|
*
|
||
|
* @param seq The sequence to encode.
|
||
|
* @param length The length of the sequence to encode.
|
||
|
*
|
||
|
* @returns The encoded sequence.
|
||
|
*
|
||
|
* @since November 2015
|
||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||
|
*/
|
||
|
byte_t* encode_seq_on_2_bits(char* seq, int32_t length);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @brief Decodes a DNA sequence that is coded with each nucleotide on 2 bits.
|
||
|
*
|
||
|
* A or a : 00
|
||
|
* C or c : 01
|
||
|
* T or t : 10
|
||
|
* G or g : 11
|
||
|
*
|
||
|
* @param seq The sequence to decode.
|
||
|
* @param length_seq The initial length of the sequence before it was encoded.
|
||
|
*
|
||
|
* @returns The decoded sequence ended with '\0'.
|
||
|
*
|
||
|
* @since November 2015
|
||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||
|
*/
|
||
|
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
|
||
|
|
||
|
|
||
|
////////// FOR DEBUGGING ///////////
|
||
|
|
||
|
// little endian
|
||
|
void print_bits(void* ptr, int32_t length);
|
||
|
|