diff --git a/python/obitools3/dms/obiseq.pxd b/python/obitools3/dms/obiseq.pxd index b397d0c..62fdbcb 100644 --- a/python/obitools3/dms/obiseq.pxd +++ b/python/obitools3/dms/obiseq.pxd @@ -4,19 +4,21 @@ from .view.view cimport Line cdef class Seq(dict) : + cdef dict _annotations - cdef str _id - cdef object _seq - cdef str _definition - cdef class Nuc_Seq(Seq) : cdef object _quality #cpdef object reverse_complement(self) - -cdef class Nuc_Seq_Stored(Line) : - cpdef object get_str_quality(self) +cdef class Seq_Stored(Line) : + pass + + +cdef class Nuc_Seq_Stored(Seq_Stored) : + + cpdef set_quality_int(self, list new_qual) + cpdef set_quality_char(self, bytes new_qual, int offset=*) #cpdef object reverse_complement(self) \ No newline at end of file diff --git a/python/obitools3/dms/obiseq.pyx b/python/obitools3/dms/obiseq.pyx index b92073c..20ac4d2 100644 --- a/python/obitools3/dms/obiseq.pyx +++ b/python/obitools3/dms/obiseq.pyx @@ -1,55 +1,68 @@ #cython: language_level=3 -from obitools3.utils cimport bytes2str, str2bytes +from obitools3.utils cimport bytes2str, str2bytes, tostr, tobytes from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \ ID_COLUMN, \ DEFINITION_COLUMN, \ - QUALITY_COLUMN - - -NUC_SEQUENCE_COLUMN_str = bytes2str(NUC_SEQUENCE_COLUMN) -ID_COLUMN_str = bytes2str(ID_COLUMN) -DEFINITION_COLUMN_str = bytes2str(DEFINITION_COLUMN) -QUALITY_COLUMN_str = bytes2str(QUALITY_COLUMN) + QUALITY_COLUMN, \ + COUNT_COLUMN cdef class Seq(dict) : - def __init__(self, str id, object seq, object definition=None) : - self.id = id - self.seq = seq + def __init__(self, object id, object seq, object definition=None, object quality=None, int offset=-1, object tags=None) : + cdef object k + cdef bytes k_b + self[ID_COLUMN] = tobytes(id) + self.seq = tobytes(seq) if definition is not None : - self.definition = definition + self.definition = tobytes(definition) + self._annotations = {} + if tags is not None : + for k in tags: + k_b = tobytes(k) + self[k_b] = tags[k_b] + + + def __contains__(self, object key): + return dict.__contains__(self, tobytes(key)) + # sequence id property getter and setter @property def id(self): # @ReservedAssignment - return self._id + return self[ID_COLUMN] @id.setter - def id(self, str new_id): # @ReservedAssignment @DuplicatedSignature - self._id = new_id - self[ID_COLUMN] = new_id + def id(self, object new_id): # @ReservedAssignment @DuplicatedSignature + self[ID_COLUMN] = tobytes(new_id) + + # sequence annotations property getter and setter + @property + def annotations(self): # @ReservedAssignment + return self._annotations + + @annotations.setter + def annotations(self, object annotations): # @ReservedAssignment @DuplicatedSignature + self._annotations = annotations # sequence property getter and setter @property def seq(self): - return self._seq + return self[b"SEQ"] @seq.setter def seq(self, object new_seq): # @DuplicatedSignature - self._seq = new_seq - self["SEQ"] = new_seq # TODO discuss + self[b"SEQ"] = tobytes(new_seq) # sequence definition property getter and setter @property def definition(self): - return self._definition + return self[DEFINITION_COLUMN] @definition.setter def definition(self, object new_definition): # @DuplicatedSignature - self._definition = new_definition - self[DEFINITION_COLUMN_str] = new_definition + self[DEFINITION_COLUMN] = tobytes(new_definition) cdef class Nuc_Seq(Seq) : @@ -62,65 +75,82 @@ cdef class Nuc_Seq(Seq) : @seq.setter def seq(self, object new_seq): # @DuplicatedSignature self._seq = new_seq - self[NUC_SEQUENCE_COLUMN_str] = new_seq + self[NUC_SEQUENCE_COLUMN] = tobytes(new_seq) # sequence quality property getter and setter @property def quality(self): - return self._quality + return self[QUALITY_COLUMN] @quality.setter def quality(self, object new_quality): # @DuplicatedSignature - self._quality = new_quality - self[QUALITY_COLUMN_str] = new_quality + self[QUALITY_COLUMN] = tobytes(new_quality) # cpdef str reverse_complement(self) : TODO in C ? # pass -cdef class Nuc_Seq_Stored(Line) : +cdef class Seq_Stored(Line) : # sequence id property getter and setter @property def id(self): # @ReservedAssignment @DuplicatedSignature - return self[ID_COLUMN_str] + return self._view.get_column(ID_COLUMN).get_line(self.index) @id.setter - def id(self, str new_id): # @ReservedAssignment @DuplicatedSignature - self[ID_COLUMN_str] = new_id + def id(self, object new_id): # @ReservedAssignment @DuplicatedSignature + self._view.get_column(ID_COLUMN).set_line(self._index, tobytes(new_id)) # sequence definition property getter and setter @property def definition(self): - return self[DEFINITION_COLUMN_str] + return self._view.get_column(DEFINITION_COLUMN).get_line(self.index) @definition.setter - def definition(self, str new_def): # @DuplicatedSignature - self[DEFINITION_COLUMN_str] = new_def - + def definition(self, object new_def): # @DuplicatedSignature + self._view.get_column(DEFINITION_COLUMN).set_line(self._index, tobytes(new_def)) + + +cdef class Nuc_Seq_Stored(Seq_Stored) : + + cpdef set_quality_int(self, list new_qual): + self._view.get_column(QUALITY_COLUMN).set_line(self.index, new_qual) + + cpdef set_quality_char(self, bytes new_qual, int offset=-1): + self._view.get_column(QUALITY_COLUMN).set_str_line(self.index, new_qual, offset=offset) + # nuc_seq property getter and setter @property def nuc_seq(self): - return self[NUC_SEQUENCE_COLUMN_str] + return self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index) @nuc_seq.setter def nuc_seq(self, object new_seq): # @DuplicatedSignature - self[NUC_SEQUENCE_COLUMN_str] = new_seq + self._view.get_column(NUC_SEQUENCE_COLUMN).set_line(self.index, tobytes(new_seq)) # quality property getter and setter @property def quality(self): - return self[QUALITY_COLUMN_str] + return self._view.get_column(QUALITY_COLUMN).get_line(self.index) @quality.setter def quality(self, object new_qual): # @DuplicatedSignature - if (type(new_qual) == list) or (new_qual is None) : # TODO check that quality column exists - self[QUALITY_COLUMN_str] = new_qual - else : # Quality is in str form - self._view.get_column(QUALITY_COLUMN_str).set_str_line(self._index, new_qual) + if (new_qual is None) or (type(new_qual) == list) : # TODO check that quality column exists + self._view.get_column(QUALITY_COLUMN).set_line(self._index, new_qual) + elif (type(new_qual) == str) or (type(new_qual) == bytes) : # Quality is in str form + self._view.get_column(QUALITY_COLUMN).set_str_line(self._index, tobytes(new_qual)) + else : + raise Exception("Sequence quality in unrecognized format") - cpdef object get_str_quality(self) : # TODO not ideal. Make quality_int and quality_str properties - return self._view.get_column(QUALITY_COLUMN_str).get_str_line(self._index) + # quality character string property getter + @property + def quality_str(self): + return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index) + + + def __str__(self): + return bytes2str(self.nuc_seq) + # cpdef str reverse_complement(self) : TODO in C ? # pass