Now handling sequences with Uracil (U) nucleotides by converting to
Thymine (T)
This commit is contained in:
@ -36,10 +36,12 @@ bool only_ATGC(const char* seq)
|
||||
{
|
||||
if (!((*c == 'A') || \
|
||||
(*c == 'T') || \
|
||||
(*c == 'U') || \
|
||||
(*c == 'G') || \
|
||||
(*c == 'C') || \
|
||||
(*c == 'a') || \
|
||||
(*c == 't') || \
|
||||
(*c == 'u') || \
|
||||
(*c == 'g') || \
|
||||
(*c == 'c')))
|
||||
{
|
||||
@ -182,6 +184,8 @@ byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
|
||||
break;
|
||||
case 't':
|
||||
case 'T':
|
||||
case 'u':
|
||||
case 'U':
|
||||
seq_b[i/4] |= NUC_T_2b;
|
||||
break;
|
||||
default:
|
||||
@ -288,6 +292,8 @@ byte_t* encode_seq_on_4_bits(const char* seq, int32_t length)
|
||||
break;
|
||||
case 't':
|
||||
case 'T':
|
||||
case 'u': // discussable
|
||||
case 'U':
|
||||
seq_b[i/2] |= NUC_T_4b;
|
||||
break;
|
||||
case 'r':
|
||||
|
44
src/encode.h
44
src/encode.h
@ -64,7 +64,7 @@ enum
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if there are only 'atgcATGC' characters in a
|
||||
* @brief Checks if there are only 'atgcuATGCU' characters in a
|
||||
* character string.
|
||||
*
|
||||
* @param seq The sequence to check.
|
||||
@ -129,12 +129,13 @@ byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encodin
|
||||
/**
|
||||
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
|
||||
*
|
||||
* A or a : 00
|
||||
* C or c : 01
|
||||
* T or t : 10
|
||||
* G or g : 11
|
||||
* A or a : 00
|
||||
* C or c : 01
|
||||
* T or t or U or u : 10
|
||||
* G or g : 11
|
||||
*
|
||||
* @warning The DNA sequence must contain only 'atgcATGC' characters.
|
||||
* @warning The DNA sequence must contain only 'atgcuATGCU' characters.
|
||||
* @warning Uracil ('U') bases are encoded as Thymine ('T') bases.
|
||||
*
|
||||
* @param seq The sequence to encode.
|
||||
* @param length The length of the sequence to encode.
|
||||
@ -169,23 +170,24 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
|
||||
/**
|
||||
* @brief Encodes a DNA sequence with each nucleotide coded on 4 bits.
|
||||
*
|
||||
* A or a : 0001
|
||||
* C or c : 0010
|
||||
* G or g : 0011
|
||||
* T or t : 0100
|
||||
* R or r : 0101
|
||||
* Y or y : 0110
|
||||
* S or s : 0111
|
||||
* W or w : 1000
|
||||
* K or k : 1001
|
||||
* M or m : 1010
|
||||
* B or b : 1011
|
||||
* D or d : 1100
|
||||
* H or h : 1101
|
||||
* V or v : 1110
|
||||
* N or n : 1111
|
||||
* A or a : 0001
|
||||
* C or c : 0010
|
||||
* G or g : 0011
|
||||
* T or t or U or u : 0100
|
||||
* R or r : 0101
|
||||
* Y or y : 0110
|
||||
* S or s : 0111
|
||||
* W or w : 1000
|
||||
* K or k : 1001
|
||||
* M or m : 1010
|
||||
* B or b : 1011
|
||||
* D or d : 1100
|
||||
* H or h : 1101
|
||||
* V or v : 1110
|
||||
* N or n : 1111
|
||||
*
|
||||
* @warning The DNA sequence must contain only IUPAC characters.
|
||||
* @warning Uracil ('U') bases are encoded as Thymine ('T') bases.
|
||||
*
|
||||
* @param seq The sequence to encode.
|
||||
* @param length The length of the sequence to encode.
|
||||
|
Reference in New Issue
Block a user