Implemented functions to build reverse complement sequences

This commit is contained in:
Celine Mercier
2018-01-05 16:08:36 +01:00
parent 428c4eb5e6
commit 156fb04e88
5 changed files with 303 additions and 25 deletions

View File

@ -28,6 +28,140 @@
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**************************************************************************
*
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
*
**************************************************************************/
/**
* Internal function returning the complement of a nucleotide base.
*
* @warning The base must be in lower case.
*
* @param nucAc The nucleotide base.
*
* @returns The complement of the nucleotide base.
* @retval The nucleotide base itself if no complement was found.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
static char nuc_base_complement(char nucAc);
/**
* Internal function returning the complement of a nucleotide sequence.
*
* @warning The sequence must be in lower case.
* @warning The sequence will be replaced by its complement without being copied.
*
* @param nucAcSeq The nucleotide sequence.
*
* @returns The complemented sequence.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
static char* nuc_seq_complement(char* nucAcSeq);
/**
* Internal function returning the reverse of a nucleotide sequence.
*
* @warning The sequence must be in lower case.
* @warning The sequence will be replaced by its reverse without being copied.
*
* @param str The nucleotide sequence.
* @param isPattern Whether the sequence is a pattern. TODO
*
* @returns The reversed sequence.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
static char* reverse_sequence(char* str, char isPattern);
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
*
************************************************************************/
static char nuc_base_complement(char nucAc)
{
char* c;
if ((c = strchr(DNA_ALPHA, nucAc)))
return CDNA_ALPHA[(c - DNA_ALPHA)];
else
return nucAc;
}
static char* nuc_seq_complement(char* nucAcSeq)
{
char *s;
for (s = nucAcSeq ; *s ; s++)
*s = nuc_base_complement(*s);
return nucAcSeq;
}
static char* reverse_sequence(char* str, char isPattern)
{
char *sb, *se, c;
if (! str)
return str;
sb = str;
se = str + strlen(str) - 1;
while(sb <= se) {
c = *sb;
*sb++ = *se;
*se-- = c;
}
sb = str;
se = str + strlen(str) - 1;
if (isPattern)
for (;sb < se; sb++)
{
if (*sb=='#')
{
if (((se - sb) > 2) && (*(sb+2)=='!'))
{
*sb='!';
sb+=2;
*sb='#';
}
else
{
*sb=*(sb+1);
sb++;
*sb='#';
}
}
else if (*sb=='!')
{
*sb=*(sb-1);
*(sb-1)='!';
}
}
return str;
}
/**********************************************************************
*
@ -404,3 +538,15 @@ loop: SWAPINIT(a, es);
/* qsort(pn - r, r / es, es, cmp);*/
}
char* reverse_complement_pattern(char* nucAcSeq)
{
return reverse_sequence(nuc_seq_complement(nucAcSeq), 1);
}
char* reverse_complement_sequence(char* nucAcSeq)
{
return reverse_sequence(nuc_seq_complement(nucAcSeq), 0);
}

View File

@ -20,10 +20,16 @@
#include "obitypes.h"
#define FORMATTED_TIME_LENGTH (1024) /**< The length allocated for the character string containing a formatted date.
*/
#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1.
*/
#define FORMATTED_TIME_LENGTH (1024) /**< The length allocated for the character string containing a formatted date.
*/
#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1.
*/
#define DNA_ALPHA "acgtbdefhijklmnopqrsuvwxyz#![]" /**< DNA alphabet (IUPAC).
//"ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" */
#define CDNA_ALPHA "tgcavhefdijmlknopqysabwxrz#!][" /**< Complementary DNA alphabet (IUPAC).
//"TVGHEFCDIJMLKNOPQYSAABWXRZ#!][" */
/**
* @brief Copy the content of a file into another file.
@ -155,4 +161,38 @@ void* bsearch_user_data(const void* key, const void* base, size_t num, size_t si
void qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *));
/**
* Function returning the reverse complement of a nucleotide sequence.
*
* @warning The sequence must be in lower case.
* @warning The sequence will be replaced by its reverse complement without being copied.
*
* @param nucAcSeq The nucleotide sequence.
*
* @returns The reverse complemented sequence.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
char* reverse_complement_sequence(char* nucAcSeq);
/**
* Function returning the reverse complement of a pattern.
*
* @warning The pattern must be in lower case.
* @warning The pattern will be replaced by its reverse complement without being copied.
*
* @param nucAcSeq The pattern.
*
* @returns The reverse complemented pattern.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
char* reverse_complement_pattern(char* nucAcSeq);
#endif /* UTILS_H_ */