Implemented functions to build reverse complement sequences
This commit is contained in:
146
src/utils.c
146
src/utils.c
@ -28,6 +28,140 @@
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Internal function returning the complement of a nucleotide base.
|
||||
*
|
||||
* @warning The base must be in lower case.
|
||||
*
|
||||
* @param nucAc The nucleotide base.
|
||||
*
|
||||
* @returns The complement of the nucleotide base.
|
||||
* @retval The nucleotide base itself if no complement was found.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
static char nuc_base_complement(char nucAc);
|
||||
|
||||
|
||||
/**
|
||||
* Internal function returning the complement of a nucleotide sequence.
|
||||
*
|
||||
* @warning The sequence must be in lower case.
|
||||
* @warning The sequence will be replaced by its complement without being copied.
|
||||
*
|
||||
* @param nucAcSeq The nucleotide sequence.
|
||||
*
|
||||
* @returns The complemented sequence.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
static char* nuc_seq_complement(char* nucAcSeq);
|
||||
|
||||
|
||||
/**
|
||||
* Internal function returning the reverse of a nucleotide sequence.
|
||||
*
|
||||
* @warning The sequence must be in lower case.
|
||||
* @warning The sequence will be replaced by its reverse without being copied.
|
||||
*
|
||||
* @param str The nucleotide sequence.
|
||||
* @param isPattern Whether the sequence is a pattern. TODO
|
||||
*
|
||||
* @returns The reversed sequence.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
static char* reverse_sequence(char* str, char isPattern);
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
*
|
||||
************************************************************************/
|
||||
|
||||
static char nuc_base_complement(char nucAc)
|
||||
{
|
||||
char* c;
|
||||
|
||||
if ((c = strchr(DNA_ALPHA, nucAc)))
|
||||
return CDNA_ALPHA[(c - DNA_ALPHA)];
|
||||
else
|
||||
return nucAc;
|
||||
}
|
||||
|
||||
|
||||
static char* nuc_seq_complement(char* nucAcSeq)
|
||||
{
|
||||
char *s;
|
||||
|
||||
for (s = nucAcSeq ; *s ; s++)
|
||||
*s = nuc_base_complement(*s);
|
||||
|
||||
return nucAcSeq;
|
||||
}
|
||||
|
||||
|
||||
static char* reverse_sequence(char* str, char isPattern)
|
||||
{
|
||||
char *sb, *se, c;
|
||||
|
||||
if (! str)
|
||||
return str;
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
while(sb <= se) {
|
||||
c = *sb;
|
||||
*sb++ = *se;
|
||||
*se-- = c;
|
||||
}
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
if (isPattern)
|
||||
for (;sb < se; sb++)
|
||||
{
|
||||
if (*sb=='#')
|
||||
{
|
||||
if (((se - sb) > 2) && (*(sb+2)=='!'))
|
||||
{
|
||||
*sb='!';
|
||||
sb+=2;
|
||||
*sb='#';
|
||||
}
|
||||
else
|
||||
{
|
||||
*sb=*(sb+1);
|
||||
sb++;
|
||||
*sb='#';
|
||||
}
|
||||
}
|
||||
else if (*sb=='!')
|
||||
{
|
||||
*sb=*(sb-1);
|
||||
*(sb-1)='!';
|
||||
}
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
@ -404,3 +538,15 @@ loop: SWAPINIT(a, es);
|
||||
/* qsort(pn - r, r / es, es, cmp);*/
|
||||
}
|
||||
|
||||
|
||||
char* reverse_complement_pattern(char* nucAcSeq)
|
||||
{
|
||||
return reverse_sequence(nuc_seq_complement(nucAcSeq), 1);
|
||||
}
|
||||
|
||||
|
||||
char* reverse_complement_sequence(char* nucAcSeq)
|
||||
{
|
||||
return reverse_sequence(nuc_seq_complement(nucAcSeq), 0);
|
||||
}
|
||||
|
||||
|
48
src/utils.h
48
src/utils.h
@ -20,10 +20,16 @@
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define FORMATTED_TIME_LENGTH (1024) /**< The length allocated for the character string containing a formatted date.
|
||||
*/
|
||||
#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1.
|
||||
*/
|
||||
#define FORMATTED_TIME_LENGTH (1024) /**< The length allocated for the character string containing a formatted date.
|
||||
*/
|
||||
#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1.
|
||||
*/
|
||||
#define DNA_ALPHA "acgtbdefhijklmnopqrsuvwxyz#![]" /**< DNA alphabet (IUPAC).
|
||||
//"ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" */
|
||||
#define CDNA_ALPHA "tgcavhefdijmlknopqysabwxrz#!][" /**< Complementary DNA alphabet (IUPAC).
|
||||
//"TVGHEFCDIJMLKNOPQYSAABWXRZ#!][" */
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Copy the content of a file into another file.
|
||||
@ -155,4 +161,38 @@ void* bsearch_user_data(const void* key, const void* base, size_t num, size_t si
|
||||
void qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *));
|
||||
|
||||
|
||||
/**
|
||||
* Function returning the reverse complement of a nucleotide sequence.
|
||||
*
|
||||
* @warning The sequence must be in lower case.
|
||||
* @warning The sequence will be replaced by its reverse complement without being copied.
|
||||
*
|
||||
* @param nucAcSeq The nucleotide sequence.
|
||||
*
|
||||
* @returns The reverse complemented sequence.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
char* reverse_complement_sequence(char* nucAcSeq);
|
||||
|
||||
|
||||
/**
|
||||
* Function returning the reverse complement of a pattern.
|
||||
*
|
||||
* @warning The pattern must be in lower case.
|
||||
* @warning The pattern will be replaced by its reverse complement without being copied.
|
||||
*
|
||||
* @param nucAcSeq The pattern.
|
||||
*
|
||||
* @returns The reverse complemented pattern.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
char* reverse_complement_pattern(char* nucAcSeq);
|
||||
|
||||
|
||||
#endif /* UTILS_H_ */
|
||||
|
Reference in New Issue
Block a user