Implemented functions to build reverse complement sequences

This commit is contained in:
Celine Mercier
2018-01-05 16:08:36 +01:00
parent 428c4eb5e6
commit 156fb04e88
5 changed files with 303 additions and 25 deletions

View File

@ -9,4 +9,4 @@ from ..capi.obitypes cimport const_char_p
cdef extern from "utils.h" nogil:
const_char_p obi_format_date(time_t date)
char* reverse_complement_sequence(char* nucAcSeq)

View File

@ -8,9 +8,10 @@ cdef class Seq(dict) :
cdef class Nuc_Seq(Seq) :
cdef object _quality
#cpdef object reverse_complement(self)
cdef char* _reverse_complement
cdef object _quality_array
cpdef bytes build_reverse_complement(self)
cdef class Seq_Stored(Line) :
@ -18,7 +19,8 @@ cdef class Seq_Stored(Line) :
cdef class Nuc_Seq_Stored(Seq_Stored) :
cdef char* _reverse_complement
cpdef set_quality_int(self, list new_qual)
cpdef set_quality_char(self, bytes new_qual, int offset=*)
#cpdef object reverse_complement(self)
cpdef bytes build_reverse_complement(self)

View File

@ -2,12 +2,23 @@
from obitools3.utils cimport bytes2str, str2bytes, tostr, tobytes
from obitools3.dms.view.view cimport View
from .capi.obitypes cimport index_t
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
QUALITY_COLUMN, \
COUNT_COLUMN
from .capi.obiutils cimport reverse_complement_sequence
from cpython cimport array
import array
from copy import deepcopy
cdef class Seq(dict) :
def __init__(self, object id, object seq, object definition=None, object quality=None, int offset=-1, object tags=None) :
@ -23,11 +34,18 @@ cdef class Seq(dict) :
k_b = tobytes(k)
self[k_b] = tags[k]
@staticmethod
def new_from_stored(Seq_Stored seq_to_clone) :
cdef Seq new_seq
new_seq = Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
return new_seq
def __contains__(self, object key):
return dict.__contains__(self, tobytes(key))
def __str__(self):
return tostr(self[NUC_SEQUENCE_COLUMN])
# sequence id property getter and setter
@property
def id(self): # @ReservedAssignment
@ -66,7 +84,31 @@ cdef class Seq(dict) :
cdef class Nuc_Seq(Seq) :
def __init__(self, object id, object seq, object definition=None, object quality=None, int offset=-1, object tags=None) :
cdef object k
cdef bytes k_b
self[ID_COLUMN] = tobytes(id)
self.seq = tobytes(seq)
if definition is not None :
self.definition = tobytes(definition)
if quality is not None:
self.quality = quality
if type(quality) == list: # TODO discuss
self._quality_array = array.array('d', quality)
self._annotations = {}
self._reverse_complement = NULL # TODO unnecessary?
if tags is not None:
for k in tags:
k_b = tobytes(k)
self[k_b] = tags[k]
@staticmethod
def new_from_stored(Nuc_Seq_Stored seq_to_clone) :
cdef Nuc_Seq new_seq
new_seq = Nuc_Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
return new_seq
# nuc sequence property getter and setter
@property
def seq(self):
@ -83,10 +125,41 @@ cdef class Nuc_Seq(Seq) :
@quality.setter
def quality(self, object new_quality): # @DuplicatedSignature
self[QUALITY_COLUMN] = tobytes(new_quality)
self[QUALITY_COLUMN] = new_quality
# sequence quality array property getter and setter
@property
def quality_array(self):
return self._quality_array
# reverse complement property getter
@property
def reverse_complement(self):
cdef bytes rev_comp
if self._reverse_complement == NULL:
rev_comp = self.build_reverse_complement()
self._reverse_complement = rev_comp
return self._reverse_complement
def __str__(self):
return tostr(self[NUC_SEQUENCE_COLUMN])
cpdef bytes build_reverse_complement(self) :
cdef bytes rev_comp
rev_comp = deepcopy(self[NUC_SEQUENCE_COLUMN]).lower()
return <bytes>reverse_complement_sequence(rev_comp)
# TODO DISCUSS
# sequence quality (character string format) property getter and setter
# @property
# def quality_str(self):
# return self[QUALITY_COLUMN]
#
# @quality.setter
# def quality_str(self, list new_quality): # @DuplicatedSignature
# self[QUALITY_COLUMN] = new_quality
# cpdef str reverse_complement(self) : TODO in C ?
# pass
cdef class Seq_Stored(Line) :
@ -112,19 +185,23 @@ cdef class Seq_Stored(Line) :
cdef class Nuc_Seq_Stored(Seq_Stored) :
def __init__(self, View view, index_t line_nb) :
self = Seq_Stored.__init__(self, view, line_nb)
self._reverse_complement = NULL # TODO unnecessary?
cpdef set_quality_int(self, list new_qual):
self._view.get_column(QUALITY_COLUMN).set_line(self.index, new_qual)
cpdef set_quality_char(self, bytes new_qual, int offset=-1):
self._view.get_column(QUALITY_COLUMN).set_str_line(self.index, new_qual, offset=offset)
# nuc_seq property getter and setter
# seq property getter and setter
@property
def nuc_seq(self):
def seq(self):
return self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index)
@nuc_seq.setter
def nuc_seq(self, object new_seq): # @DuplicatedSignature
@seq.setter
def seq(self, object new_seq): # @DuplicatedSignature
self._view.get_column(NUC_SEQUENCE_COLUMN).set_line(self.index, tobytes(new_seq))
# quality property getter and setter
@ -144,16 +221,29 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
# quality character string property getter
@property
def quality_str(self):
return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index)
return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index)
# reverse complement property getter
@property
def reverse_complement(self):
cdef bytes rev_comp
if self._reverse_complement == NULL:
rev_comp = self.build_reverse_complement()
self._reverse_complement = rev_comp
return self._reverse_complement
def __str__(self):
return bytes2str(self.nuc_seq)
return tostr(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
def __len__(self):
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
cpdef bytes build_reverse_complement(self) :
cdef bytes rev_comp
rev_comp = deepcopy(self[NUC_SEQUENCE_COLUMN]).lower()
return <bytes>reverse_complement_sequence(rev_comp)
# cpdef str reverse_complement(self) : TODO in C ?
# pass
# TODO static method to import OBI_Nuc_Seq to OBI_Nuc_Seq_Stored ?