Now handling sequences with Uracil (U) nucleotides by converting to
Thymine (T)
This commit is contained in:
@ -36,10 +36,12 @@ bool only_ATGC(const char* seq)
|
|||||||
{
|
{
|
||||||
if (!((*c == 'A') || \
|
if (!((*c == 'A') || \
|
||||||
(*c == 'T') || \
|
(*c == 'T') || \
|
||||||
|
(*c == 'U') || \
|
||||||
(*c == 'G') || \
|
(*c == 'G') || \
|
||||||
(*c == 'C') || \
|
(*c == 'C') || \
|
||||||
(*c == 'a') || \
|
(*c == 'a') || \
|
||||||
(*c == 't') || \
|
(*c == 't') || \
|
||||||
|
(*c == 'u') || \
|
||||||
(*c == 'g') || \
|
(*c == 'g') || \
|
||||||
(*c == 'c')))
|
(*c == 'c')))
|
||||||
{
|
{
|
||||||
@ -182,6 +184,8 @@ byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
|
|||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
case 'T':
|
case 'T':
|
||||||
|
case 'u':
|
||||||
|
case 'U':
|
||||||
seq_b[i/4] |= NUC_T_2b;
|
seq_b[i/4] |= NUC_T_2b;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -288,6 +292,8 @@ byte_t* encode_seq_on_4_bits(const char* seq, int32_t length)
|
|||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
case 'T':
|
case 'T':
|
||||||
|
case 'u': // discussable
|
||||||
|
case 'U':
|
||||||
seq_b[i/2] |= NUC_T_4b;
|
seq_b[i/2] |= NUC_T_4b;
|
||||||
break;
|
break;
|
||||||
case 'r':
|
case 'r':
|
||||||
|
44
src/encode.h
44
src/encode.h
@ -64,7 +64,7 @@ enum
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Checks if there are only 'atgcATGC' characters in a
|
* @brief Checks if there are only 'atgcuATGCU' characters in a
|
||||||
* character string.
|
* character string.
|
||||||
*
|
*
|
||||||
* @param seq The sequence to check.
|
* @param seq The sequence to check.
|
||||||
@ -129,12 +129,13 @@ byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encodin
|
|||||||
/**
|
/**
|
||||||
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
|
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
|
||||||
*
|
*
|
||||||
* A or a : 00
|
* A or a : 00
|
||||||
* C or c : 01
|
* C or c : 01
|
||||||
* T or t : 10
|
* T or t or U or u : 10
|
||||||
* G or g : 11
|
* G or g : 11
|
||||||
*
|
*
|
||||||
* @warning The DNA sequence must contain only 'atgcATGC' characters.
|
* @warning The DNA sequence must contain only 'atgcuATGCU' characters.
|
||||||
|
* @warning Uracil ('U') bases are encoded as Thymine ('T') bases.
|
||||||
*
|
*
|
||||||
* @param seq The sequence to encode.
|
* @param seq The sequence to encode.
|
||||||
* @param length The length of the sequence to encode.
|
* @param length The length of the sequence to encode.
|
||||||
@ -169,23 +170,24 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
|
|||||||
/**
|
/**
|
||||||
* @brief Encodes a DNA sequence with each nucleotide coded on 4 bits.
|
* @brief Encodes a DNA sequence with each nucleotide coded on 4 bits.
|
||||||
*
|
*
|
||||||
* A or a : 0001
|
* A or a : 0001
|
||||||
* C or c : 0010
|
* C or c : 0010
|
||||||
* G or g : 0011
|
* G or g : 0011
|
||||||
* T or t : 0100
|
* T or t or U or u : 0100
|
||||||
* R or r : 0101
|
* R or r : 0101
|
||||||
* Y or y : 0110
|
* Y or y : 0110
|
||||||
* S or s : 0111
|
* S or s : 0111
|
||||||
* W or w : 1000
|
* W or w : 1000
|
||||||
* K or k : 1001
|
* K or k : 1001
|
||||||
* M or m : 1010
|
* M or m : 1010
|
||||||
* B or b : 1011
|
* B or b : 1011
|
||||||
* D or d : 1100
|
* D or d : 1100
|
||||||
* H or h : 1101
|
* H or h : 1101
|
||||||
* V or v : 1110
|
* V or v : 1110
|
||||||
* N or n : 1111
|
* N or n : 1111
|
||||||
*
|
*
|
||||||
* @warning The DNA sequence must contain only IUPAC characters.
|
* @warning The DNA sequence must contain only IUPAC characters.
|
||||||
|
* @warning Uracil ('U') bases are encoded as Thymine ('T') bases.
|
||||||
*
|
*
|
||||||
* @param seq The sequence to encode.
|
* @param seq The sequence to encode.
|
||||||
* @param length The length of the sequence to encode.
|
* @param length The length of the sequence to encode.
|
||||||
|
Reference in New Issue
Block a user