C: Added a function to get a nucleotide at a specific index in an

encoded sequence
This commit is contained in:
Celine Mercier
2019-01-21 17:18:02 +01:00
parent 2a6a112d29
commit 9f6bba183f
2 changed files with 48 additions and 0 deletions

View File

@ -114,6 +114,35 @@ bool is_a_DNA_seq(const char* seq)
}
byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encoding)
{
uint8_t shift;
uint8_t mask;
byte_t nuc;
if (encoding == 2)
{
shift = 6 - 2*(idx % 4);
mask = NUC_MASK_2B << shift;
nuc = (seq[idx/4] & mask) >> shift;
}
else if (encoding == 4)
{
shift = 4 - 4*(idx % 2);
mask = NUC_MASK_4B << shift;
nuc = (seq[idx/2] & mask) >> shift;
}
else
{
obi_set_errno(OBI_DECODE_ERROR);
obidebug(1, "\nInvalid encoding base: must be on 2 bits or 4 bits");
return -1;
}
return nuc;
}
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
{
byte_t* seq_b;
@ -405,6 +434,7 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
///////////////////// FOR DEBUGGING ///////////////////////////
//NOTE: The first byte is printed the first (at the left-most).
// TODO Move to utils
void print_bits(void* ptr, int32_t size)
{

View File

@ -108,6 +108,24 @@ bool only_IUPAC_DNA(const char* seq);
bool is_a_DNA_seq(const char* seq);
/**
* @brief Returns a nucleotide from a DNA sequence encoded
* with each nucleotide on 2 or 4 bits.
*
* @param seq The encoded sequence.
* @param idx The index (in the decoded sequence) of the nucleotide to get.
* @param encoding An integer indicating whether the sequence is encoded with each nucleotide on 2 or 4 bits.
*
* @returns The (still encoded) nucleotide at the given index.
* @retval 255 if an error occurred.
*
* @see decode_seq_on_2_bits() and decode_seq_on_4_bits()
* @since January 2019
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encoding);
/**
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
*