Now handling sequences with Uracil (U) nucleotides by converting to
Thymine (T)
This commit is contained in:
@ -36,10 +36,12 @@ bool only_ATGC(const char* seq)
|
||||
{
|
||||
if (!((*c == 'A') || \
|
||||
(*c == 'T') || \
|
||||
(*c == 'U') || \
|
||||
(*c == 'G') || \
|
||||
(*c == 'C') || \
|
||||
(*c == 'a') || \
|
||||
(*c == 't') || \
|
||||
(*c == 'u') || \
|
||||
(*c == 'g') || \
|
||||
(*c == 'c')))
|
||||
{
|
||||
@ -182,6 +184,8 @@ byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
|
||||
break;
|
||||
case 't':
|
||||
case 'T':
|
||||
case 'u':
|
||||
case 'U':
|
||||
seq_b[i/4] |= NUC_T_2b;
|
||||
break;
|
||||
default:
|
||||
@ -288,6 +292,8 @@ byte_t* encode_seq_on_4_bits(const char* seq, int32_t length)
|
||||
break;
|
||||
case 't':
|
||||
case 'T':
|
||||
case 'u': // discussable
|
||||
case 'U':
|
||||
seq_b[i/2] |= NUC_T_4b;
|
||||
break;
|
||||
case 'r':
|
||||
|
10
src/encode.h
10
src/encode.h
@ -64,7 +64,7 @@ enum
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if there are only 'atgcATGC' characters in a
|
||||
* @brief Checks if there are only 'atgcuATGCU' characters in a
|
||||
* character string.
|
||||
*
|
||||
* @param seq The sequence to check.
|
||||
@ -131,10 +131,11 @@ byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encodin
|
||||
*
|
||||
* A or a : 00
|
||||
* C or c : 01
|
||||
* T or t : 10
|
||||
* T or t or U or u : 10
|
||||
* G or g : 11
|
||||
*
|
||||
* @warning The DNA sequence must contain only 'atgcATGC' characters.
|
||||
* @warning The DNA sequence must contain only 'atgcuATGCU' characters.
|
||||
* @warning Uracil ('U') bases are encoded as Thymine ('T') bases.
|
||||
*
|
||||
* @param seq The sequence to encode.
|
||||
* @param length The length of the sequence to encode.
|
||||
@ -172,7 +173,7 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
|
||||
* A or a : 0001
|
||||
* C or c : 0010
|
||||
* G or g : 0011
|
||||
* T or t : 0100
|
||||
* T or t or U or u : 0100
|
||||
* R or r : 0101
|
||||
* Y or y : 0110
|
||||
* S or s : 0111
|
||||
@ -186,6 +187,7 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
|
||||
* N or n : 1111
|
||||
*
|
||||
* @warning The DNA sequence must contain only IUPAC characters.
|
||||
* @warning Uracil ('U') bases are encoded as Thymine ('T') bases.
|
||||
*
|
||||
* @param seq The sequence to encode.
|
||||
* @param length The length of the sequence to encode.
|
||||
|
Reference in New Issue
Block a user