Cython API: added slices in Seq classes and fixes
This commit is contained in:
@ -8,19 +8,22 @@ cdef class Seq(dict) :
|
||||
cpdef object clone(self)
|
||||
cpdef str get_str(self)
|
||||
cpdef get_symbol_at(self, int pos)
|
||||
|
||||
cpdef get_slice(self, slice slice_to_get)
|
||||
|
||||
|
||||
cdef class Nuc_Seq(Seq) :
|
||||
|
||||
cdef Nuc_Seq _reverse_complement
|
||||
cdef object _quality_array
|
||||
cdef bint _revcomp
|
||||
cpdef set_quality(self, object new_quality, int offset=*)
|
||||
cpdef object build_quality_array(self, list quality)
|
||||
cpdef bytes build_reverse_complement(self)
|
||||
|
||||
|
||||
cdef class Seq_Stored(Line) :
|
||||
pass
|
||||
cpdef get_symbol_at(self, int pos)
|
||||
cpdef get_slice(self, slice slice_to_get)
|
||||
|
||||
|
||||
cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
@ -29,7 +32,6 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
cdef object _quality_array
|
||||
cdef bytes _seq
|
||||
cpdef set(self, object id, object seq, object definition=*, object quality=*, int offset=*, object tags=*)
|
||||
cpdef get_symbol_at(self, int pos)
|
||||
cpdef set_quality_int(self, list new_qual)
|
||||
cpdef set_quality_char(self, object new_qual, int offset=*)
|
||||
cpdef object build_quality_array(self, list quality)
|
||||
|
@ -4,7 +4,9 @@ from obitools3.utils cimport bytes2str, str2bytes, tostr, tobytes
|
||||
|
||||
from obitools3.dms.view.view cimport View
|
||||
|
||||
from .capi.obitypes cimport index_t
|
||||
from obitools3.dms.column.column cimport Column
|
||||
|
||||
from .capi.obitypes cimport index_t, OBI_QUAL
|
||||
|
||||
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
ID_COLUMN, \
|
||||
@ -30,10 +32,9 @@ cdef class Seq(dict) :
|
||||
def __init__(self, object id, object seq, object definition=None, object tags=None) :
|
||||
cdef object k
|
||||
cdef bytes k_b
|
||||
self[ID_COLUMN] = tobytes(id)
|
||||
self[SEQUENCE_COLUMN] = tobytes(seq)
|
||||
if definition is not None :
|
||||
self.definition = tobytes(definition)
|
||||
self.id = id
|
||||
self.seq = seq
|
||||
self.definition = definition
|
||||
if tags is not None :
|
||||
for k in tags:
|
||||
k_b = tobytes(k)
|
||||
@ -56,6 +57,8 @@ cdef class Seq(dict) :
|
||||
def __getitem__(self, object ref):
|
||||
if type(ref) == int:
|
||||
return self.get_symbol_at(ref)
|
||||
elif type(ref) == slice:
|
||||
return self.get_slice(ref)
|
||||
else:
|
||||
return super(Seq, self).__getitem__(tobytes(ref))
|
||||
|
||||
@ -77,6 +80,18 @@ cdef class Seq(dict) :
|
||||
seq = seq_class(self.id, self.seq, definition=self.definition, quality=self.quality, tags=self)
|
||||
return seq
|
||||
|
||||
cpdef object get_slice(self, slice slice_to_get):
|
||||
cdef object new_seq
|
||||
cdef list sliced_quality
|
||||
cdef type seq_class
|
||||
seq_class = type(self)
|
||||
if QUALITY_COLUMN in self:
|
||||
sliced_quality = self.quality[slice_to_get]
|
||||
else:
|
||||
sliced_quality = None
|
||||
new_seq = seq_class(self.id+b"_SUB", self.seq[slice_to_get], definition=self.definition, quality=sliced_quality, tags=self) # TODO discuss suffix
|
||||
return new_seq
|
||||
|
||||
cpdef get_symbol_at(self, int pos):
|
||||
return self.seq[pos:pos+1]
|
||||
|
||||
@ -105,7 +120,9 @@ cdef class Seq(dict) :
|
||||
|
||||
@definition.setter
|
||||
def definition(self, object new_definition): # @DuplicatedSignature
|
||||
self[DEFINITION_COLUMN] = tobytes(new_definition)
|
||||
if new_definition is not None:
|
||||
new_definition = tobytes(new_definition)
|
||||
self[DEFINITION_COLUMN] = new_definition
|
||||
|
||||
|
||||
cdef class Nuc_Seq(Seq) :
|
||||
@ -116,13 +133,14 @@ cdef class Nuc_Seq(Seq) :
|
||||
cdef int q
|
||||
cdef list q_proba_list
|
||||
|
||||
self[ID_COLUMN] = tobytes(id)
|
||||
self[NUC_SEQUENCE_COLUMN] = tobytes(seq)
|
||||
if definition is not None :
|
||||
self.definition = tobytes(definition)
|
||||
self.id = id
|
||||
self.seq = seq
|
||||
self.definition = definition
|
||||
if quality is not None:
|
||||
self.set_quality(quality, offset=offset)
|
||||
|
||||
|
||||
self._revcomp = False
|
||||
|
||||
if tags is not None:
|
||||
for k in tags:
|
||||
k_b = tobytes(k)
|
||||
@ -139,6 +157,15 @@ cdef class Nuc_Seq(Seq) :
|
||||
new_seq = Nuc_Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
|
||||
return new_seq
|
||||
|
||||
# revcomp property getter and setter (boolean indicating whether the sequence was created by reverse complementing another sequence)
|
||||
@property
|
||||
def revcomp(self):
|
||||
return self._revcomp
|
||||
|
||||
@revcomp.setter
|
||||
def revcomp(self, bint revcomp): # @DuplicatedSignature
|
||||
self._revcomp = revcomp
|
||||
|
||||
# nuc sequence property getter and setter
|
||||
@property
|
||||
def seq(self):
|
||||
@ -151,7 +178,10 @@ cdef class Nuc_Seq(Seq) :
|
||||
# sequence quality property getter and setter
|
||||
@property
|
||||
def quality(self):
|
||||
return self[QUALITY_COLUMN]
|
||||
if QUALITY_COLUMN in self:
|
||||
return self[QUALITY_COLUMN]
|
||||
else:
|
||||
return None
|
||||
|
||||
cpdef set_quality(self, object new_quality, int offset=-1):
|
||||
cdef list quality_int
|
||||
@ -169,7 +199,8 @@ cdef class Nuc_Seq(Seq) :
|
||||
@property
|
||||
def quality_array(self):
|
||||
if self._quality_array is None:
|
||||
self._quality_array = self.build_quality_array(self[QUALITY_COLUMN])
|
||||
if QUALITY_COLUMN in self:
|
||||
self._quality_array = self.build_quality_array(self[QUALITY_COLUMN])
|
||||
return self._quality_array
|
||||
|
||||
cpdef object build_quality_array(self, list quality):
|
||||
@ -190,7 +221,8 @@ cdef class Nuc_Seq(Seq) :
|
||||
reversed_quality = self.quality[::-1]
|
||||
else:
|
||||
reversed_quality = None
|
||||
seq = Nuc_Seq(self.id, rev_comp, definition=self.definition, quality=reversed_quality, tags=self)
|
||||
seq = Nuc_Seq(self.id+b"_CMP", rev_comp, definition=self.definition, quality=reversed_quality, tags=self)
|
||||
seq.revcomp = True
|
||||
self._reverse_complement = seq
|
||||
return self._reverse_complement
|
||||
|
||||
@ -202,6 +234,29 @@ cdef class Nuc_Seq(Seq) :
|
||||
|
||||
cdef class Seq_Stored(Line) :
|
||||
|
||||
def __getitem__(self, object ref):
|
||||
if type(ref) == int:
|
||||
return self.get_symbol_at(ref)
|
||||
elif type(ref) == slice:
|
||||
return self.get_slice(ref)
|
||||
else:
|
||||
return super(Seq_Stored, self).__getitem__(ref)
|
||||
|
||||
cpdef object get_slice(self, slice slice_to_get):
|
||||
cdef object new_seq
|
||||
cdef list sliced_quality
|
||||
cdef type seq_class
|
||||
seq_class = type(self)
|
||||
if QUALITY_COLUMN in self:
|
||||
sliced_quality = self.quality[slice_to_get]
|
||||
else:
|
||||
sliced_quality = None
|
||||
new_seq = seq_class(self.id+b"_SUB", self.seq[slice_to_get], definition=self.definition, quality=sliced_quality, tags=self) # TODO discuss suffix
|
||||
return new_seq
|
||||
|
||||
cpdef get_symbol_at(self, int pos):
|
||||
return self.seq[pos:pos+1]
|
||||
|
||||
# sequence id property getter and setter
|
||||
@property
|
||||
def id(self): # @ReservedAssignment @DuplicatedSignature
|
||||
@ -243,7 +298,7 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
|
||||
self[ID_COLUMN] = tobytes(id)
|
||||
self[NUC_SEQUENCE_COLUMN] = tobytes(seq)
|
||||
if definition is not None :
|
||||
if definition is not None:
|
||||
self.definition = tobytes(definition)
|
||||
if quality is not None:
|
||||
if type(quality) == list:
|
||||
@ -257,21 +312,11 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
for k in tags:
|
||||
k_b = tobytes(k)
|
||||
if k_b not in SPECIAL_COLUMNS:
|
||||
# TODO discuss convert value to bytes if str
|
||||
if type(tags[k]) == str:
|
||||
self[k_b] = str2bytes(tags[k])
|
||||
else:
|
||||
self[k_b] = tags[k]
|
||||
|
||||
def __getitem__(self, object ref):
|
||||
if type(ref) == int:
|
||||
return self.get_symbol_at(ref)
|
||||
else:
|
||||
return super(Nuc_Seq_Stored, self).__getitem__(ref)
|
||||
|
||||
cpdef get_symbol_at(self, int pos):
|
||||
return self.seq[pos:pos+1]
|
||||
|
||||
# seq property getter and setter
|
||||
@property
|
||||
def seq(self):
|
||||
@ -287,15 +332,22 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
self._view.get_column(NUC_SEQUENCE_COLUMN).set_line(self.index, tobytes(new_seq))
|
||||
|
||||
cpdef set_quality_int(self, list new_qual):
|
||||
if QUALITY_COLUMN not in self:
|
||||
Column.new_column(self._view, QUALITY_COLUMN, OBI_QUAL)
|
||||
self._view.get_column(QUALITY_COLUMN).set_line(self.index, new_qual)
|
||||
|
||||
cpdef set_quality_char(self, object new_qual, int offset=-1):
|
||||
if QUALITY_COLUMN not in self:
|
||||
Column.new_column(self._view, QUALITY_COLUMN, OBI_QUAL)
|
||||
self._view.get_column(QUALITY_COLUMN).set_str_line(self.index, tobytes(new_qual), offset=offset)
|
||||
|
||||
# quality property getter and setter
|
||||
@property
|
||||
def quality(self):
|
||||
return self._view.get_column(QUALITY_COLUMN).get_line(self.index)
|
||||
if QUALITY_COLUMN in self:
|
||||
return self._view.get_column(QUALITY_COLUMN).get_line(self.index)
|
||||
else:
|
||||
return None
|
||||
|
||||
@quality.setter
|
||||
def quality(self, object new_qual): # @DuplicatedSignature
|
||||
@ -311,13 +363,17 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
# WARNING: default offset used
|
||||
@property
|
||||
def quality_str(self):
|
||||
return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index)
|
||||
if QUALITY_COLUMN in self:
|
||||
return self._view.get_column(QUALITY_COLUMN).get_str_line(self._index)
|
||||
else:
|
||||
return None
|
||||
|
||||
# sequence quality array property getter and setter
|
||||
@property
|
||||
def quality_array(self):
|
||||
if self._quality_array is None:
|
||||
self._quality_array = self.build_quality_array(self._view.get_column(QUALITY_COLUMN).get_line(self.index))
|
||||
if QUALITY_COLUMN in self:
|
||||
self._quality_array = self.build_quality_array(self._view.get_column(QUALITY_COLUMN).get_line(self.index))
|
||||
return self._quality_array
|
||||
|
||||
cpdef object build_quality_array(self, list quality):
|
||||
@ -331,14 +387,15 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
@property
|
||||
def reverse_complement(self):
|
||||
cdef bytes rev_comp
|
||||
cdef object seq_class
|
||||
cdef list reversed_quality
|
||||
if self._reverse_complement is None:
|
||||
rev_comp = self.build_reverse_complement()
|
||||
if QUALITY_COLUMN in self:
|
||||
reversed_quality = self.quality[::-1]
|
||||
else:
|
||||
reversed_quality = None
|
||||
seq = Nuc_Seq(self.id, rev_comp, definition=self.definition, quality=reversed_quality, tags=self)
|
||||
seq = Nuc_Seq(self.id+b"_CMP", rev_comp, definition=self.definition, quality=reversed_quality, tags=self)
|
||||
seq.revcomp = True
|
||||
self._reverse_complement = seq
|
||||
return self._reverse_complement
|
||||
|
||||
@ -356,7 +413,3 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
def __len__(self):
|
||||
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
||||
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user