C functions to detect IUPAC sequences

This commit is contained in:
Celine Mercier
2017-07-05 17:26:03 +02:00
parent 101f764cce
commit d10192ab0e
3 changed files with 92 additions and 0 deletions

View File

@ -12,6 +12,8 @@ cdef extern from *:
cdef extern from "encode.h" nogil:
bint only_ATGC(const_char_p seq)
bint only_IUPAC_DNA(const_char_p seq)
bint is_a_DNA_seq(const_char_p seq)
cdef extern from "obitypes.h" nogil:

View File

@ -54,6 +54,66 @@ bool only_ATGC(const char* seq)
}
bool only_IUPAC_DNA(const char* seq)
{
const char* c = seq;
while (*c)
{
if (!((*c == 'A') || \
(*c == 'T') || \
(*c == 'G') || \
(*c == 'C') || \
(*c == 'U') || \
(*c == 'R') || \
(*c == 'Y') || \
(*c == 'S') || \
(*c == 'W') || \
(*c == 'K') || \
(*c == 'M') || \
(*c == 'B') || \
(*c == 'D') || \
(*c == 'H') || \
(*c == 'V') || \
(*c == 'N') || \
(*c == 'a') || \
(*c == 't') || \
(*c == 'g') || \
(*c == 'c') || \
(*c == 'u') || \
(*c == 'r') || \
(*c == 'y') || \
(*c == 's') || \
(*c == 'w') || \
(*c == 'k') || \
(*c == 'm') || \
(*c == 'b') || \
(*c == 'd') || \
(*c == 'h') || \
(*c == 'v') || \
(*c == 'n') || \
(*c == '.') || \
(*c == '-')))
{
return 0;
}
else
{
c++;
}
}
return 1;
}
bool is_a_DNA_seq(const char* seq)
{
if (only_ATGC(seq))
return true;
return only_IUPAC_DNA(seq);
}
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
{
byte_t* seq_b;

View File

@ -78,6 +78,36 @@ enum
bool only_ATGC(const char* seq);
/**
* @brief Checks if there are only IUPAC DNA characters in a
* character string.
*
* @param seq The sequence to check.
*
* @returns A boolean value indicating if there are only
* IUPAC DNA characters in a character string.
*
* @since May 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
bool only_IUPAC_DNA(const char* seq);
/**
* @brief Checks if a character string can be read as a DNA sequence encoded
* with ACGT or IUPAC characters (in capital letters or not).
*
* @param seq The sequence to check.
*
* @returns A boolean value indicating if the character string
* can be read as a DNA sequence.
*
* @since May 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
bool is_a_DNA_seq(const char* seq);
/**
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
*