Implemented functions to build reverse complement sequences
This commit is contained in:
@ -9,4 +9,4 @@ from ..capi.obitypes cimport const_char_p
|
||||
cdef extern from "utils.h" nogil:
|
||||
|
||||
const_char_p obi_format_date(time_t date)
|
||||
|
||||
char* reverse_complement_sequence(char* nucAcSeq)
|
||||
|
@ -9,8 +9,9 @@ cdef class Seq(dict) :
|
||||
|
||||
cdef class Nuc_Seq(Seq) :
|
||||
|
||||
cdef object _quality
|
||||
#cpdef object reverse_complement(self)
|
||||
cdef char* _reverse_complement
|
||||
cdef object _quality_array
|
||||
cpdef bytes build_reverse_complement(self)
|
||||
|
||||
|
||||
cdef class Seq_Stored(Line) :
|
||||
@ -19,6 +20,7 @@ cdef class Seq_Stored(Line) :
|
||||
|
||||
cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
|
||||
cdef char* _reverse_complement
|
||||
cpdef set_quality_int(self, list new_qual)
|
||||
cpdef set_quality_char(self, bytes new_qual, int offset=*)
|
||||
#cpdef object reverse_complement(self)
|
||||
cpdef bytes build_reverse_complement(self)
|
@ -2,12 +2,23 @@
|
||||
|
||||
from obitools3.utils cimport bytes2str, str2bytes, tostr, tobytes
|
||||
|
||||
from obitools3.dms.view.view cimport View
|
||||
|
||||
from .capi.obitypes cimport index_t
|
||||
|
||||
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
ID_COLUMN, \
|
||||
DEFINITION_COLUMN, \
|
||||
QUALITY_COLUMN, \
|
||||
COUNT_COLUMN
|
||||
|
||||
from .capi.obiutils cimport reverse_complement_sequence
|
||||
|
||||
from cpython cimport array
|
||||
import array
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
|
||||
cdef class Seq(dict) :
|
||||
def __init__(self, object id, object seq, object definition=None, object quality=None, int offset=-1, object tags=None) :
|
||||
@ -23,10 +34,17 @@ cdef class Seq(dict) :
|
||||
k_b = tobytes(k)
|
||||
self[k_b] = tags[k]
|
||||
|
||||
@staticmethod
|
||||
def new_from_stored(Seq_Stored seq_to_clone) :
|
||||
cdef Seq new_seq
|
||||
new_seq = Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
|
||||
return new_seq
|
||||
|
||||
def __contains__(self, object key):
|
||||
return dict.__contains__(self, tobytes(key))
|
||||
|
||||
def __str__(self):
|
||||
return tostr(self[NUC_SEQUENCE_COLUMN])
|
||||
|
||||
# sequence id property getter and setter
|
||||
@property
|
||||
@ -67,6 +85,30 @@ cdef class Seq(dict) :
|
||||
|
||||
cdef class Nuc_Seq(Seq) :
|
||||
|
||||
def __init__(self, object id, object seq, object definition=None, object quality=None, int offset=-1, object tags=None) :
|
||||
cdef object k
|
||||
cdef bytes k_b
|
||||
self[ID_COLUMN] = tobytes(id)
|
||||
self.seq = tobytes(seq)
|
||||
if definition is not None :
|
||||
self.definition = tobytes(definition)
|
||||
if quality is not None:
|
||||
self.quality = quality
|
||||
if type(quality) == list: # TODO discuss
|
||||
self._quality_array = array.array('d', quality)
|
||||
self._annotations = {}
|
||||
self._reverse_complement = NULL # TODO unnecessary?
|
||||
if tags is not None:
|
||||
for k in tags:
|
||||
k_b = tobytes(k)
|
||||
self[k_b] = tags[k]
|
||||
|
||||
@staticmethod
|
||||
def new_from_stored(Nuc_Seq_Stored seq_to_clone) :
|
||||
cdef Nuc_Seq new_seq
|
||||
new_seq = Nuc_Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
|
||||
return new_seq
|
||||
|
||||
# nuc sequence property getter and setter
|
||||
@property
|
||||
def seq(self):
|
||||
@ -83,10 +125,41 @@ cdef class Nuc_Seq(Seq) :
|
||||
|
||||
@quality.setter
|
||||
def quality(self, object new_quality): # @DuplicatedSignature
|
||||
self[QUALITY_COLUMN] = tobytes(new_quality)
|
||||
self[QUALITY_COLUMN] = new_quality
|
||||
|
||||
# sequence quality array property getter and setter
|
||||
@property
|
||||
def quality_array(self):
|
||||
return self._quality_array
|
||||
|
||||
# reverse complement property getter
|
||||
@property
|
||||
def reverse_complement(self):
|
||||
cdef bytes rev_comp
|
||||
if self._reverse_complement == NULL:
|
||||
rev_comp = self.build_reverse_complement()
|
||||
self._reverse_complement = rev_comp
|
||||
return self._reverse_complement
|
||||
|
||||
def __str__(self):
|
||||
return tostr(self[NUC_SEQUENCE_COLUMN])
|
||||
|
||||
cpdef bytes build_reverse_complement(self) :
|
||||
cdef bytes rev_comp
|
||||
rev_comp = deepcopy(self[NUC_SEQUENCE_COLUMN]).lower()
|
||||
return <bytes>reverse_complement_sequence(rev_comp)
|
||||
|
||||
|
||||
# TODO DISCUSS
|
||||
# sequence quality (character string format) property getter and setter
|
||||
# @property
|
||||
# def quality_str(self):
|
||||
# return self[QUALITY_COLUMN]
|
||||
#
|
||||
# @quality.setter
|
||||
# def quality_str(self, list new_quality): # @DuplicatedSignature
|
||||
# self[QUALITY_COLUMN] = new_quality
|
||||
|
||||
# cpdef str reverse_complement(self) : TODO in C ?
|
||||
# pass
|
||||
|
||||
|
||||
cdef class Seq_Stored(Line) :
|
||||
@ -112,19 +185,23 @@ cdef class Seq_Stored(Line) :
|
||||
|
||||
cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
|
||||
def __init__(self, View view, index_t line_nb) :
|
||||
self = Seq_Stored.__init__(self, view, line_nb)
|
||||
self._reverse_complement = NULL # TODO unnecessary?
|
||||
|
||||
cpdef set_quality_int(self, list new_qual):
|
||||
self._view.get_column(QUALITY_COLUMN).set_line(self.index, new_qual)
|
||||
|
||||
cpdef set_quality_char(self, bytes new_qual, int offset=-1):
|
||||
self._view.get_column(QUALITY_COLUMN).set_str_line(self.index, new_qual, offset=offset)
|
||||
|
||||
# nuc_seq property getter and setter
|
||||
# seq property getter and setter
|
||||
@property
|
||||
def nuc_seq(self):
|
||||
def seq(self):
|
||||
return self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index)
|
||||
|
||||
@nuc_seq.setter
|
||||
def nuc_seq(self, object new_seq): # @DuplicatedSignature
|
||||
@seq.setter
|
||||
def seq(self, object new_seq): # @DuplicatedSignature
|
||||
self._view.get_column(NUC_SEQUENCE_COLUMN).set_line(self.index, tobytes(new_seq))
|
||||
|
||||
# quality property getter and setter
|
||||
@ -146,13 +223,26 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
def quality_str(self):
|
||||
return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index)
|
||||
|
||||
# reverse complement property getter
|
||||
@property
|
||||
def reverse_complement(self):
|
||||
cdef bytes rev_comp
|
||||
if self._reverse_complement == NULL:
|
||||
rev_comp = self.build_reverse_complement()
|
||||
self._reverse_complement = rev_comp
|
||||
return self._reverse_complement
|
||||
|
||||
def __str__(self):
|
||||
return bytes2str(self.nuc_seq)
|
||||
return tostr(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
||||
|
||||
def __len__(self):
|
||||
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
||||
|
||||
cpdef bytes build_reverse_complement(self) :
|
||||
cdef bytes rev_comp
|
||||
rev_comp = deepcopy(self[NUC_SEQUENCE_COLUMN]).lower()
|
||||
return <bytes>reverse_complement_sequence(rev_comp)
|
||||
|
||||
# cpdef str reverse_complement(self) : TODO in C ?
|
||||
# pass
|
||||
|
||||
# TODO static method to import OBI_Nuc_Seq to OBI_Nuc_Seq_Stored ?
|
||||
|
||||
|
146
src/utils.c
146
src/utils.c
@ -28,6 +28,140 @@
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Internal function returning the complement of a nucleotide base.
|
||||
*
|
||||
* @warning The base must be in lower case.
|
||||
*
|
||||
* @param nucAc The nucleotide base.
|
||||
*
|
||||
* @returns The complement of the nucleotide base.
|
||||
* @retval The nucleotide base itself if no complement was found.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
static char nuc_base_complement(char nucAc);
|
||||
|
||||
|
||||
/**
|
||||
* Internal function returning the complement of a nucleotide sequence.
|
||||
*
|
||||
* @warning The sequence must be in lower case.
|
||||
* @warning The sequence will be replaced by its complement without being copied.
|
||||
*
|
||||
* @param nucAcSeq The nucleotide sequence.
|
||||
*
|
||||
* @returns The complemented sequence.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
static char* nuc_seq_complement(char* nucAcSeq);
|
||||
|
||||
|
||||
/**
|
||||
* Internal function returning the reverse of a nucleotide sequence.
|
||||
*
|
||||
* @warning The sequence must be in lower case.
|
||||
* @warning The sequence will be replaced by its reverse without being copied.
|
||||
*
|
||||
* @param str The nucleotide sequence.
|
||||
* @param isPattern Whether the sequence is a pattern. TODO
|
||||
*
|
||||
* @returns The reversed sequence.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
static char* reverse_sequence(char* str, char isPattern);
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
*
|
||||
************************************************************************/
|
||||
|
||||
static char nuc_base_complement(char nucAc)
|
||||
{
|
||||
char* c;
|
||||
|
||||
if ((c = strchr(DNA_ALPHA, nucAc)))
|
||||
return CDNA_ALPHA[(c - DNA_ALPHA)];
|
||||
else
|
||||
return nucAc;
|
||||
}
|
||||
|
||||
|
||||
static char* nuc_seq_complement(char* nucAcSeq)
|
||||
{
|
||||
char *s;
|
||||
|
||||
for (s = nucAcSeq ; *s ; s++)
|
||||
*s = nuc_base_complement(*s);
|
||||
|
||||
return nucAcSeq;
|
||||
}
|
||||
|
||||
|
||||
static char* reverse_sequence(char* str, char isPattern)
|
||||
{
|
||||
char *sb, *se, c;
|
||||
|
||||
if (! str)
|
||||
return str;
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
while(sb <= se) {
|
||||
c = *sb;
|
||||
*sb++ = *se;
|
||||
*se-- = c;
|
||||
}
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
if (isPattern)
|
||||
for (;sb < se; sb++)
|
||||
{
|
||||
if (*sb=='#')
|
||||
{
|
||||
if (((se - sb) > 2) && (*(sb+2)=='!'))
|
||||
{
|
||||
*sb='!';
|
||||
sb+=2;
|
||||
*sb='#';
|
||||
}
|
||||
else
|
||||
{
|
||||
*sb=*(sb+1);
|
||||
sb++;
|
||||
*sb='#';
|
||||
}
|
||||
}
|
||||
else if (*sb=='!')
|
||||
{
|
||||
*sb=*(sb-1);
|
||||
*(sb-1)='!';
|
||||
}
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
@ -404,3 +538,15 @@ loop: SWAPINIT(a, es);
|
||||
/* qsort(pn - r, r / es, es, cmp);*/
|
||||
}
|
||||
|
||||
|
||||
char* reverse_complement_pattern(char* nucAcSeq)
|
||||
{
|
||||
return reverse_sequence(nuc_seq_complement(nucAcSeq), 1);
|
||||
}
|
||||
|
||||
|
||||
char* reverse_complement_sequence(char* nucAcSeq)
|
||||
{
|
||||
return reverse_sequence(nuc_seq_complement(nucAcSeq), 0);
|
||||
}
|
||||
|
||||
|
40
src/utils.h
40
src/utils.h
@ -24,6 +24,12 @@
|
||||
*/
|
||||
#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1.
|
||||
*/
|
||||
#define DNA_ALPHA "acgtbdefhijklmnopqrsuvwxyz#![]" /**< DNA alphabet (IUPAC).
|
||||
//"ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" */
|
||||
#define CDNA_ALPHA "tgcavhefdijmlknopqysabwxrz#!][" /**< Complementary DNA alphabet (IUPAC).
|
||||
//"TVGHEFCDIJMLKNOPQYSAABWXRZ#!][" */
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Copy the content of a file into another file.
|
||||
@ -155,4 +161,38 @@ void* bsearch_user_data(const void* key, const void* base, size_t num, size_t si
|
||||
void qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *));
|
||||
|
||||
|
||||
/**
|
||||
* Function returning the reverse complement of a nucleotide sequence.
|
||||
*
|
||||
* @warning The sequence must be in lower case.
|
||||
* @warning The sequence will be replaced by its reverse complement without being copied.
|
||||
*
|
||||
* @param nucAcSeq The nucleotide sequence.
|
||||
*
|
||||
* @returns The reverse complemented sequence.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
char* reverse_complement_sequence(char* nucAcSeq);
|
||||
|
||||
|
||||
/**
|
||||
* Function returning the reverse complement of a pattern.
|
||||
*
|
||||
* @warning The pattern must be in lower case.
|
||||
* @warning The pattern will be replaced by its reverse complement without being copied.
|
||||
*
|
||||
* @param nucAcSeq The pattern.
|
||||
*
|
||||
* @returns The reverse complemented pattern.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @note Copied from ecoPCR source code
|
||||
*/
|
||||
char* reverse_complement_pattern(char* nucAcSeq);
|
||||
|
||||
|
||||
#endif /* UTILS_H_ */
|
||||
|
Reference in New Issue
Block a user