C: Added a function to get a nucleotide at a specific index in an
encoded sequence
This commit is contained in:
30
src/encode.c
30
src/encode.c
@ -114,6 +114,35 @@ bool is_a_DNA_seq(const char* seq)
|
||||
}
|
||||
|
||||
|
||||
byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encoding)
|
||||
{
|
||||
uint8_t shift;
|
||||
uint8_t mask;
|
||||
byte_t nuc;
|
||||
|
||||
if (encoding == 2)
|
||||
{
|
||||
shift = 6 - 2*(idx % 4);
|
||||
mask = NUC_MASK_2B << shift;
|
||||
nuc = (seq[idx/4] & mask) >> shift;
|
||||
}
|
||||
else if (encoding == 4)
|
||||
{
|
||||
shift = 4 - 4*(idx % 2);
|
||||
mask = NUC_MASK_4B << shift;
|
||||
nuc = (seq[idx/2] & mask) >> shift;
|
||||
}
|
||||
else
|
||||
{
|
||||
obi_set_errno(OBI_DECODE_ERROR);
|
||||
obidebug(1, "\nInvalid encoding base: must be on 2 bits or 4 bits");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return nuc;
|
||||
}
|
||||
|
||||
|
||||
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
|
||||
{
|
||||
byte_t* seq_b;
|
||||
@ -405,6 +434,7 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
|
||||
|
||||
///////////////////// FOR DEBUGGING ///////////////////////////
|
||||
//NOTE: The first byte is printed the first (at the left-most).
|
||||
// TODO Move to utils
|
||||
|
||||
void print_bits(void* ptr, int32_t size)
|
||||
{
|
||||
|
18
src/encode.h
18
src/encode.h
@ -108,6 +108,24 @@ bool only_IUPAC_DNA(const char* seq);
|
||||
bool is_a_DNA_seq(const char* seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Returns a nucleotide from a DNA sequence encoded
|
||||
* with each nucleotide on 2 or 4 bits.
|
||||
*
|
||||
* @param seq The encoded sequence.
|
||||
* @param idx The index (in the decoded sequence) of the nucleotide to get.
|
||||
* @param encoding An integer indicating whether the sequence is encoded with each nucleotide on 2 or 4 bits.
|
||||
*
|
||||
* @returns The (still encoded) nucleotide at the given index.
|
||||
* @retval 255 if an error occurred.
|
||||
*
|
||||
* @see decode_seq_on_2_bits() and decode_seq_on_4_bits()
|
||||
* @since January 2019
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encoding);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
|
||||
*
|
||||
|
Reference in New Issue
Block a user