Cython API: Seq classes reworked

This commit is contained in:
Celine Mercier
2017-07-27 19:39:58 +02:00
parent 0655063bb0
commit 75f691d55a
2 changed files with 82 additions and 50 deletions

View File

@ -4,10 +4,7 @@ from .view.view cimport Line
cdef class Seq(dict) : cdef class Seq(dict) :
cdef dict _annotations
cdef str _id
cdef object _seq
cdef str _definition
cdef class Nuc_Seq(Seq) : cdef class Nuc_Seq(Seq) :
@ -16,7 +13,12 @@ cdef class Nuc_Seq(Seq) :
#cpdef object reverse_complement(self) #cpdef object reverse_complement(self)
cdef class Nuc_Seq_Stored(Line) : cdef class Seq_Stored(Line) :
pass
cpdef object get_str_quality(self)
cdef class Nuc_Seq_Stored(Seq_Stored) :
cpdef set_quality_int(self, list new_qual)
cpdef set_quality_char(self, bytes new_qual, int offset=*)
#cpdef object reverse_complement(self) #cpdef object reverse_complement(self)

View File

@ -1,55 +1,68 @@
#cython: language_level=3 #cython: language_level=3
from obitools3.utils cimport bytes2str, str2bytes from obitools3.utils cimport bytes2str, str2bytes, tostr, tobytes
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \ from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \ ID_COLUMN, \
DEFINITION_COLUMN, \ DEFINITION_COLUMN, \
QUALITY_COLUMN QUALITY_COLUMN, \
COUNT_COLUMN
NUC_SEQUENCE_COLUMN_str = bytes2str(NUC_SEQUENCE_COLUMN)
ID_COLUMN_str = bytes2str(ID_COLUMN)
DEFINITION_COLUMN_str = bytes2str(DEFINITION_COLUMN)
QUALITY_COLUMN_str = bytes2str(QUALITY_COLUMN)
cdef class Seq(dict) : cdef class Seq(dict) :
def __init__(self, str id, object seq, object definition=None) : def __init__(self, object id, object seq, object definition=None, object quality=None, int offset=-1, object tags=None) :
self.id = id cdef object k
self.seq = seq cdef bytes k_b
self[ID_COLUMN] = tobytes(id)
self.seq = tobytes(seq)
if definition is not None : if definition is not None :
self.definition = definition self.definition = tobytes(definition)
self._annotations = {}
if tags is not None :
for k in tags:
k_b = tobytes(k)
self[k_b] = tags[k_b]
def __contains__(self, object key):
return dict.__contains__(self, tobytes(key))
# sequence id property getter and setter # sequence id property getter and setter
@property @property
def id(self): # @ReservedAssignment def id(self): # @ReservedAssignment
return self._id return self[ID_COLUMN]
@id.setter @id.setter
def id(self, str new_id): # @ReservedAssignment @DuplicatedSignature def id(self, object new_id): # @ReservedAssignment @DuplicatedSignature
self._id = new_id self[ID_COLUMN] = tobytes(new_id)
self[ID_COLUMN] = new_id
# sequence annotations property getter and setter
@property
def annotations(self): # @ReservedAssignment
return self._annotations
@annotations.setter
def annotations(self, object annotations): # @ReservedAssignment @DuplicatedSignature
self._annotations = annotations
# sequence property getter and setter # sequence property getter and setter
@property @property
def seq(self): def seq(self):
return self._seq return self[b"SEQ"]
@seq.setter @seq.setter
def seq(self, object new_seq): # @DuplicatedSignature def seq(self, object new_seq): # @DuplicatedSignature
self._seq = new_seq self[b"SEQ"] = tobytes(new_seq)
self["SEQ"] = new_seq # TODO discuss
# sequence definition property getter and setter # sequence definition property getter and setter
@property @property
def definition(self): def definition(self):
return self._definition return self[DEFINITION_COLUMN]
@definition.setter @definition.setter
def definition(self, object new_definition): # @DuplicatedSignature def definition(self, object new_definition): # @DuplicatedSignature
self._definition = new_definition self[DEFINITION_COLUMN] = tobytes(new_definition)
self[DEFINITION_COLUMN_str] = new_definition
cdef class Nuc_Seq(Seq) : cdef class Nuc_Seq(Seq) :
@ -62,65 +75,82 @@ cdef class Nuc_Seq(Seq) :
@seq.setter @seq.setter
def seq(self, object new_seq): # @DuplicatedSignature def seq(self, object new_seq): # @DuplicatedSignature
self._seq = new_seq self._seq = new_seq
self[NUC_SEQUENCE_COLUMN_str] = new_seq self[NUC_SEQUENCE_COLUMN] = tobytes(new_seq)
# sequence quality property getter and setter # sequence quality property getter and setter
@property @property
def quality(self): def quality(self):
return self._quality return self[QUALITY_COLUMN]
@quality.setter @quality.setter
def quality(self, object new_quality): # @DuplicatedSignature def quality(self, object new_quality): # @DuplicatedSignature
self._quality = new_quality self[QUALITY_COLUMN] = tobytes(new_quality)
self[QUALITY_COLUMN_str] = new_quality
# cpdef str reverse_complement(self) : TODO in C ? # cpdef str reverse_complement(self) : TODO in C ?
# pass # pass
cdef class Nuc_Seq_Stored(Line) : cdef class Seq_Stored(Line) :
# sequence id property getter and setter # sequence id property getter and setter
@property @property
def id(self): # @ReservedAssignment @DuplicatedSignature def id(self): # @ReservedAssignment @DuplicatedSignature
return self[ID_COLUMN_str] return self._view.get_column(ID_COLUMN).get_line(self.index)
@id.setter @id.setter
def id(self, str new_id): # @ReservedAssignment @DuplicatedSignature def id(self, object new_id): # @ReservedAssignment @DuplicatedSignature
self[ID_COLUMN_str] = new_id self._view.get_column(ID_COLUMN).set_line(self._index, tobytes(new_id))
# sequence definition property getter and setter # sequence definition property getter and setter
@property @property
def definition(self): def definition(self):
return self[DEFINITION_COLUMN_str] return self._view.get_column(DEFINITION_COLUMN).get_line(self.index)
@definition.setter @definition.setter
def definition(self, str new_def): # @DuplicatedSignature def definition(self, object new_def): # @DuplicatedSignature
self[DEFINITION_COLUMN_str] = new_def self._view.get_column(DEFINITION_COLUMN).set_line(self._index, tobytes(new_def))
cdef class Nuc_Seq_Stored(Seq_Stored) :
cpdef set_quality_int(self, list new_qual):
self._view.get_column(QUALITY_COLUMN).set_line(self.index, new_qual)
cpdef set_quality_char(self, bytes new_qual, int offset=-1):
self._view.get_column(QUALITY_COLUMN).set_str_line(self.index, new_qual, offset=offset)
# nuc_seq property getter and setter # nuc_seq property getter and setter
@property @property
def nuc_seq(self): def nuc_seq(self):
return self[NUC_SEQUENCE_COLUMN_str] return self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index)
@nuc_seq.setter @nuc_seq.setter
def nuc_seq(self, object new_seq): # @DuplicatedSignature def nuc_seq(self, object new_seq): # @DuplicatedSignature
self[NUC_SEQUENCE_COLUMN_str] = new_seq self._view.get_column(NUC_SEQUENCE_COLUMN).set_line(self.index, tobytes(new_seq))
# quality property getter and setter # quality property getter and setter
@property @property
def quality(self): def quality(self):
return self[QUALITY_COLUMN_str] return self._view.get_column(QUALITY_COLUMN).get_line(self.index)
@quality.setter @quality.setter
def quality(self, object new_qual): # @DuplicatedSignature def quality(self, object new_qual): # @DuplicatedSignature
if (type(new_qual) == list) or (new_qual is None) : # TODO check that quality column exists if (new_qual is None) or (type(new_qual) == list) : # TODO check that quality column exists
self[QUALITY_COLUMN_str] = new_qual self._view.get_column(QUALITY_COLUMN).set_line(self._index, new_qual)
else : # Quality is in str form elif (type(new_qual) == str) or (type(new_qual) == bytes) : # Quality is in str form
self._view.get_column(QUALITY_COLUMN_str).set_str_line(self._index, new_qual) self._view.get_column(QUALITY_COLUMN).set_str_line(self._index, tobytes(new_qual))
else :
raise Exception("Sequence quality in unrecognized format")
# quality character string property getter
@property
def quality_str(self):
return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index)
def __str__(self):
return bytes2str(self.nuc_seq)
cpdef object get_str_quality(self) : # TODO not ideal. Make quality_int and quality_str properties
return self._view.get_column(QUALITY_COLUMN_str).get_str_line(self._index)
# cpdef str reverse_complement(self) : TODO in C ? # cpdef str reverse_complement(self) : TODO in C ?
# pass # pass