diff --git a/src/encode.c b/src/encode.c index b1be7cd..085db80 100755 --- a/src/encode.c +++ b/src/encode.c @@ -114,6 +114,35 @@ bool is_a_DNA_seq(const char* seq) } +byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encoding) +{ + uint8_t shift; + uint8_t mask; + byte_t nuc; + + if (encoding == 2) + { + shift = 6 - 2*(idx % 4); + mask = NUC_MASK_2B << shift; + nuc = (seq[idx/4] & mask) >> shift; + } + else if (encoding == 4) + { + shift = 4 - 4*(idx % 2); + mask = NUC_MASK_4B << shift; + nuc = (seq[idx/2] & mask) >> shift; + } + else + { + obi_set_errno(OBI_DECODE_ERROR); + obidebug(1, "\nInvalid encoding base: must be on 2 bits or 4 bits"); + return -1; + } + + return nuc; +} + + byte_t* encode_seq_on_2_bits(const char* seq, int32_t length) { byte_t* seq_b; @@ -405,6 +434,7 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq) ///////////////////// FOR DEBUGGING /////////////////////////// //NOTE: The first byte is printed the first (at the left-most). +// TODO Move to utils void print_bits(void* ptr, int32_t size) { diff --git a/src/encode.h b/src/encode.h index 6acd906..db8a214 100755 --- a/src/encode.h +++ b/src/encode.h @@ -108,6 +108,24 @@ bool only_IUPAC_DNA(const char* seq); bool is_a_DNA_seq(const char* seq); +/** + * @brief Returns a nucleotide from a DNA sequence encoded + * with each nucleotide on 2 or 4 bits. + * + * @param seq The encoded sequence. + * @param idx The index (in the decoded sequence) of the nucleotide to get. + * @param encoding An integer indicating whether the sequence is encoded with each nucleotide on 2 or 4 bits. + * + * @returns The (still encoded) nucleotide at the given index. + * @retval 255 if an error occurred. + * + * @see decode_seq_on_2_bits() and decode_seq_on_4_bits() + * @since January 2019 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +byte_t get_nucleotide_from_encoded_seq(byte_t* seq, int32_t idx, uint8_t encoding); + + /** * @brief Encodes a DNA sequence with each nucleotide coded on 2 bits. *