Cython API: added slices in Seq classes and fixes
This commit is contained in:
@ -8,19 +8,22 @@ cdef class Seq(dict) :
|
|||||||
cpdef object clone(self)
|
cpdef object clone(self)
|
||||||
cpdef str get_str(self)
|
cpdef str get_str(self)
|
||||||
cpdef get_symbol_at(self, int pos)
|
cpdef get_symbol_at(self, int pos)
|
||||||
|
cpdef get_slice(self, slice slice_to_get)
|
||||||
|
|
||||||
|
|
||||||
cdef class Nuc_Seq(Seq) :
|
cdef class Nuc_Seq(Seq) :
|
||||||
|
|
||||||
cdef Nuc_Seq _reverse_complement
|
cdef Nuc_Seq _reverse_complement
|
||||||
cdef object _quality_array
|
cdef object _quality_array
|
||||||
|
cdef bint _revcomp
|
||||||
cpdef set_quality(self, object new_quality, int offset=*)
|
cpdef set_quality(self, object new_quality, int offset=*)
|
||||||
cpdef object build_quality_array(self, list quality)
|
cpdef object build_quality_array(self, list quality)
|
||||||
cpdef bytes build_reverse_complement(self)
|
cpdef bytes build_reverse_complement(self)
|
||||||
|
|
||||||
|
|
||||||
cdef class Seq_Stored(Line) :
|
cdef class Seq_Stored(Line) :
|
||||||
pass
|
cpdef get_symbol_at(self, int pos)
|
||||||
|
cpdef get_slice(self, slice slice_to_get)
|
||||||
|
|
||||||
|
|
||||||
cdef class Nuc_Seq_Stored(Seq_Stored) :
|
cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||||
@ -29,7 +32,6 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
cdef object _quality_array
|
cdef object _quality_array
|
||||||
cdef bytes _seq
|
cdef bytes _seq
|
||||||
cpdef set(self, object id, object seq, object definition=*, object quality=*, int offset=*, object tags=*)
|
cpdef set(self, object id, object seq, object definition=*, object quality=*, int offset=*, object tags=*)
|
||||||
cpdef get_symbol_at(self, int pos)
|
|
||||||
cpdef set_quality_int(self, list new_qual)
|
cpdef set_quality_int(self, list new_qual)
|
||||||
cpdef set_quality_char(self, object new_qual, int offset=*)
|
cpdef set_quality_char(self, object new_qual, int offset=*)
|
||||||
cpdef object build_quality_array(self, list quality)
|
cpdef object build_quality_array(self, list quality)
|
||||||
|
@ -4,7 +4,9 @@ from obitools3.utils cimport bytes2str, str2bytes, tostr, tobytes
|
|||||||
|
|
||||||
from obitools3.dms.view.view cimport View
|
from obitools3.dms.view.view cimport View
|
||||||
|
|
||||||
from .capi.obitypes cimport index_t
|
from obitools3.dms.column.column cimport Column
|
||||||
|
|
||||||
|
from .capi.obitypes cimport index_t, OBI_QUAL
|
||||||
|
|
||||||
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||||
ID_COLUMN, \
|
ID_COLUMN, \
|
||||||
@ -30,10 +32,9 @@ cdef class Seq(dict) :
|
|||||||
def __init__(self, object id, object seq, object definition=None, object tags=None) :
|
def __init__(self, object id, object seq, object definition=None, object tags=None) :
|
||||||
cdef object k
|
cdef object k
|
||||||
cdef bytes k_b
|
cdef bytes k_b
|
||||||
self[ID_COLUMN] = tobytes(id)
|
self.id = id
|
||||||
self[SEQUENCE_COLUMN] = tobytes(seq)
|
self.seq = seq
|
||||||
if definition is not None :
|
self.definition = definition
|
||||||
self.definition = tobytes(definition)
|
|
||||||
if tags is not None :
|
if tags is not None :
|
||||||
for k in tags:
|
for k in tags:
|
||||||
k_b = tobytes(k)
|
k_b = tobytes(k)
|
||||||
@ -56,6 +57,8 @@ cdef class Seq(dict) :
|
|||||||
def __getitem__(self, object ref):
|
def __getitem__(self, object ref):
|
||||||
if type(ref) == int:
|
if type(ref) == int:
|
||||||
return self.get_symbol_at(ref)
|
return self.get_symbol_at(ref)
|
||||||
|
elif type(ref) == slice:
|
||||||
|
return self.get_slice(ref)
|
||||||
else:
|
else:
|
||||||
return super(Seq, self).__getitem__(tobytes(ref))
|
return super(Seq, self).__getitem__(tobytes(ref))
|
||||||
|
|
||||||
@ -77,6 +80,18 @@ cdef class Seq(dict) :
|
|||||||
seq = seq_class(self.id, self.seq, definition=self.definition, quality=self.quality, tags=self)
|
seq = seq_class(self.id, self.seq, definition=self.definition, quality=self.quality, tags=self)
|
||||||
return seq
|
return seq
|
||||||
|
|
||||||
|
cpdef object get_slice(self, slice slice_to_get):
|
||||||
|
cdef object new_seq
|
||||||
|
cdef list sliced_quality
|
||||||
|
cdef type seq_class
|
||||||
|
seq_class = type(self)
|
||||||
|
if QUALITY_COLUMN in self:
|
||||||
|
sliced_quality = self.quality[slice_to_get]
|
||||||
|
else:
|
||||||
|
sliced_quality = None
|
||||||
|
new_seq = seq_class(self.id+b"_SUB", self.seq[slice_to_get], definition=self.definition, quality=sliced_quality, tags=self) # TODO discuss suffix
|
||||||
|
return new_seq
|
||||||
|
|
||||||
cpdef get_symbol_at(self, int pos):
|
cpdef get_symbol_at(self, int pos):
|
||||||
return self.seq[pos:pos+1]
|
return self.seq[pos:pos+1]
|
||||||
|
|
||||||
@ -105,7 +120,9 @@ cdef class Seq(dict) :
|
|||||||
|
|
||||||
@definition.setter
|
@definition.setter
|
||||||
def definition(self, object new_definition): # @DuplicatedSignature
|
def definition(self, object new_definition): # @DuplicatedSignature
|
||||||
self[DEFINITION_COLUMN] = tobytes(new_definition)
|
if new_definition is not None:
|
||||||
|
new_definition = tobytes(new_definition)
|
||||||
|
self[DEFINITION_COLUMN] = new_definition
|
||||||
|
|
||||||
|
|
||||||
cdef class Nuc_Seq(Seq) :
|
cdef class Nuc_Seq(Seq) :
|
||||||
@ -116,13 +133,14 @@ cdef class Nuc_Seq(Seq) :
|
|||||||
cdef int q
|
cdef int q
|
||||||
cdef list q_proba_list
|
cdef list q_proba_list
|
||||||
|
|
||||||
self[ID_COLUMN] = tobytes(id)
|
self.id = id
|
||||||
self[NUC_SEQUENCE_COLUMN] = tobytes(seq)
|
self.seq = seq
|
||||||
if definition is not None :
|
self.definition = definition
|
||||||
self.definition = tobytes(definition)
|
|
||||||
if quality is not None:
|
if quality is not None:
|
||||||
self.set_quality(quality, offset=offset)
|
self.set_quality(quality, offset=offset)
|
||||||
|
|
||||||
|
self._revcomp = False
|
||||||
|
|
||||||
if tags is not None:
|
if tags is not None:
|
||||||
for k in tags:
|
for k in tags:
|
||||||
k_b = tobytes(k)
|
k_b = tobytes(k)
|
||||||
@ -139,6 +157,15 @@ cdef class Nuc_Seq(Seq) :
|
|||||||
new_seq = Nuc_Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
|
new_seq = Nuc_Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
|
||||||
return new_seq
|
return new_seq
|
||||||
|
|
||||||
|
# revcomp property getter and setter (boolean indicating whether the sequence was created by reverse complementing another sequence)
|
||||||
|
@property
|
||||||
|
def revcomp(self):
|
||||||
|
return self._revcomp
|
||||||
|
|
||||||
|
@revcomp.setter
|
||||||
|
def revcomp(self, bint revcomp): # @DuplicatedSignature
|
||||||
|
self._revcomp = revcomp
|
||||||
|
|
||||||
# nuc sequence property getter and setter
|
# nuc sequence property getter and setter
|
||||||
@property
|
@property
|
||||||
def seq(self):
|
def seq(self):
|
||||||
@ -151,7 +178,10 @@ cdef class Nuc_Seq(Seq) :
|
|||||||
# sequence quality property getter and setter
|
# sequence quality property getter and setter
|
||||||
@property
|
@property
|
||||||
def quality(self):
|
def quality(self):
|
||||||
return self[QUALITY_COLUMN]
|
if QUALITY_COLUMN in self:
|
||||||
|
return self[QUALITY_COLUMN]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
cpdef set_quality(self, object new_quality, int offset=-1):
|
cpdef set_quality(self, object new_quality, int offset=-1):
|
||||||
cdef list quality_int
|
cdef list quality_int
|
||||||
@ -169,7 +199,8 @@ cdef class Nuc_Seq(Seq) :
|
|||||||
@property
|
@property
|
||||||
def quality_array(self):
|
def quality_array(self):
|
||||||
if self._quality_array is None:
|
if self._quality_array is None:
|
||||||
self._quality_array = self.build_quality_array(self[QUALITY_COLUMN])
|
if QUALITY_COLUMN in self:
|
||||||
|
self._quality_array = self.build_quality_array(self[QUALITY_COLUMN])
|
||||||
return self._quality_array
|
return self._quality_array
|
||||||
|
|
||||||
cpdef object build_quality_array(self, list quality):
|
cpdef object build_quality_array(self, list quality):
|
||||||
@ -190,7 +221,8 @@ cdef class Nuc_Seq(Seq) :
|
|||||||
reversed_quality = self.quality[::-1]
|
reversed_quality = self.quality[::-1]
|
||||||
else:
|
else:
|
||||||
reversed_quality = None
|
reversed_quality = None
|
||||||
seq = Nuc_Seq(self.id, rev_comp, definition=self.definition, quality=reversed_quality, tags=self)
|
seq = Nuc_Seq(self.id+b"_CMP", rev_comp, definition=self.definition, quality=reversed_quality, tags=self)
|
||||||
|
seq.revcomp = True
|
||||||
self._reverse_complement = seq
|
self._reverse_complement = seq
|
||||||
return self._reverse_complement
|
return self._reverse_complement
|
||||||
|
|
||||||
@ -202,6 +234,29 @@ cdef class Nuc_Seq(Seq) :
|
|||||||
|
|
||||||
cdef class Seq_Stored(Line) :
|
cdef class Seq_Stored(Line) :
|
||||||
|
|
||||||
|
def __getitem__(self, object ref):
|
||||||
|
if type(ref) == int:
|
||||||
|
return self.get_symbol_at(ref)
|
||||||
|
elif type(ref) == slice:
|
||||||
|
return self.get_slice(ref)
|
||||||
|
else:
|
||||||
|
return super(Seq_Stored, self).__getitem__(ref)
|
||||||
|
|
||||||
|
cpdef object get_slice(self, slice slice_to_get):
|
||||||
|
cdef object new_seq
|
||||||
|
cdef list sliced_quality
|
||||||
|
cdef type seq_class
|
||||||
|
seq_class = type(self)
|
||||||
|
if QUALITY_COLUMN in self:
|
||||||
|
sliced_quality = self.quality[slice_to_get]
|
||||||
|
else:
|
||||||
|
sliced_quality = None
|
||||||
|
new_seq = seq_class(self.id+b"_SUB", self.seq[slice_to_get], definition=self.definition, quality=sliced_quality, tags=self) # TODO discuss suffix
|
||||||
|
return new_seq
|
||||||
|
|
||||||
|
cpdef get_symbol_at(self, int pos):
|
||||||
|
return self.seq[pos:pos+1]
|
||||||
|
|
||||||
# sequence id property getter and setter
|
# sequence id property getter and setter
|
||||||
@property
|
@property
|
||||||
def id(self): # @ReservedAssignment @DuplicatedSignature
|
def id(self): # @ReservedAssignment @DuplicatedSignature
|
||||||
@ -243,7 +298,7 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
|
|
||||||
self[ID_COLUMN] = tobytes(id)
|
self[ID_COLUMN] = tobytes(id)
|
||||||
self[NUC_SEQUENCE_COLUMN] = tobytes(seq)
|
self[NUC_SEQUENCE_COLUMN] = tobytes(seq)
|
||||||
if definition is not None :
|
if definition is not None:
|
||||||
self.definition = tobytes(definition)
|
self.definition = tobytes(definition)
|
||||||
if quality is not None:
|
if quality is not None:
|
||||||
if type(quality) == list:
|
if type(quality) == list:
|
||||||
@ -257,21 +312,11 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
for k in tags:
|
for k in tags:
|
||||||
k_b = tobytes(k)
|
k_b = tobytes(k)
|
||||||
if k_b not in SPECIAL_COLUMNS:
|
if k_b not in SPECIAL_COLUMNS:
|
||||||
# TODO discuss convert value to bytes if str
|
|
||||||
if type(tags[k]) == str:
|
if type(tags[k]) == str:
|
||||||
self[k_b] = str2bytes(tags[k])
|
self[k_b] = str2bytes(tags[k])
|
||||||
else:
|
else:
|
||||||
self[k_b] = tags[k]
|
self[k_b] = tags[k]
|
||||||
|
|
||||||
def __getitem__(self, object ref):
|
|
||||||
if type(ref) == int:
|
|
||||||
return self.get_symbol_at(ref)
|
|
||||||
else:
|
|
||||||
return super(Nuc_Seq_Stored, self).__getitem__(ref)
|
|
||||||
|
|
||||||
cpdef get_symbol_at(self, int pos):
|
|
||||||
return self.seq[pos:pos+1]
|
|
||||||
|
|
||||||
# seq property getter and setter
|
# seq property getter and setter
|
||||||
@property
|
@property
|
||||||
def seq(self):
|
def seq(self):
|
||||||
@ -287,15 +332,22 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
self._view.get_column(NUC_SEQUENCE_COLUMN).set_line(self.index, tobytes(new_seq))
|
self._view.get_column(NUC_SEQUENCE_COLUMN).set_line(self.index, tobytes(new_seq))
|
||||||
|
|
||||||
cpdef set_quality_int(self, list new_qual):
|
cpdef set_quality_int(self, list new_qual):
|
||||||
|
if QUALITY_COLUMN not in self:
|
||||||
|
Column.new_column(self._view, QUALITY_COLUMN, OBI_QUAL)
|
||||||
self._view.get_column(QUALITY_COLUMN).set_line(self.index, new_qual)
|
self._view.get_column(QUALITY_COLUMN).set_line(self.index, new_qual)
|
||||||
|
|
||||||
cpdef set_quality_char(self, object new_qual, int offset=-1):
|
cpdef set_quality_char(self, object new_qual, int offset=-1):
|
||||||
|
if QUALITY_COLUMN not in self:
|
||||||
|
Column.new_column(self._view, QUALITY_COLUMN, OBI_QUAL)
|
||||||
self._view.get_column(QUALITY_COLUMN).set_str_line(self.index, tobytes(new_qual), offset=offset)
|
self._view.get_column(QUALITY_COLUMN).set_str_line(self.index, tobytes(new_qual), offset=offset)
|
||||||
|
|
||||||
# quality property getter and setter
|
# quality property getter and setter
|
||||||
@property
|
@property
|
||||||
def quality(self):
|
def quality(self):
|
||||||
return self._view.get_column(QUALITY_COLUMN).get_line(self.index)
|
if QUALITY_COLUMN in self:
|
||||||
|
return self._view.get_column(QUALITY_COLUMN).get_line(self.index)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
@quality.setter
|
@quality.setter
|
||||||
def quality(self, object new_qual): # @DuplicatedSignature
|
def quality(self, object new_qual): # @DuplicatedSignature
|
||||||
@ -311,13 +363,17 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
# WARNING: default offset used
|
# WARNING: default offset used
|
||||||
@property
|
@property
|
||||||
def quality_str(self):
|
def quality_str(self):
|
||||||
return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index)
|
if QUALITY_COLUMN in self:
|
||||||
|
return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
# sequence quality array property getter and setter
|
# sequence quality array property getter and setter
|
||||||
@property
|
@property
|
||||||
def quality_array(self):
|
def quality_array(self):
|
||||||
if self._quality_array is None:
|
if self._quality_array is None:
|
||||||
self._quality_array = self.build_quality_array(self._view.get_column(QUALITY_COLUMN).get_line(self.index))
|
if QUALITY_COLUMN in self:
|
||||||
|
self._quality_array = self.build_quality_array(self._view.get_column(QUALITY_COLUMN).get_line(self.index))
|
||||||
return self._quality_array
|
return self._quality_array
|
||||||
|
|
||||||
cpdef object build_quality_array(self, list quality):
|
cpdef object build_quality_array(self, list quality):
|
||||||
@ -331,14 +387,15 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
@property
|
@property
|
||||||
def reverse_complement(self):
|
def reverse_complement(self):
|
||||||
cdef bytes rev_comp
|
cdef bytes rev_comp
|
||||||
cdef object seq_class
|
cdef list reversed_quality
|
||||||
if self._reverse_complement is None:
|
if self._reverse_complement is None:
|
||||||
rev_comp = self.build_reverse_complement()
|
rev_comp = self.build_reverse_complement()
|
||||||
if QUALITY_COLUMN in self:
|
if QUALITY_COLUMN in self:
|
||||||
reversed_quality = self.quality[::-1]
|
reversed_quality = self.quality[::-1]
|
||||||
else:
|
else:
|
||||||
reversed_quality = None
|
reversed_quality = None
|
||||||
seq = Nuc_Seq(self.id, rev_comp, definition=self.definition, quality=reversed_quality, tags=self)
|
seq = Nuc_Seq(self.id+b"_CMP", rev_comp, definition=self.definition, quality=reversed_quality, tags=self)
|
||||||
|
seq.revcomp = True
|
||||||
self._reverse_complement = seq
|
self._reverse_complement = seq
|
||||||
return self._reverse_complement
|
return self._reverse_complement
|
||||||
|
|
||||||
@ -356,7 +413,3 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user