separate the obicolumn classes in new files

This commit is contained in:
2016-12-20 08:14:24 +01:00
parent 04d39c62ab
commit f43dc3e3ab
7 changed files with 396 additions and 344 deletions

View File

@ -7,42 +7,6 @@ from .capi.obitypes cimport obiversion_t, OBIType_t, index_t
from ._obitaxo cimport OBI_Taxonomy
cdef class OBIDMS_column:
cdef str _alias # associated property: alias
cdef OBIDMS_column_p* _pointer
cdef OBIView _view
cpdef close(self)
@staticmethod
cdef object get_subclass_type(OBIDMS_column_p column_p)
cdef class OBIDMS_column_multi_elts(OBIDMS_column):
cpdef set_line(self, index_t line_nb, dict values)
cdef class OBIDMS_column_line:
cdef OBIDMS_column _column
cdef index_t _index
cdef class OBIView_line :
cdef index_t _index
cdef OBIView _view
cdef class OBIView_line_selection(list):
cdef OBIView _view
cdef str _view_name
cdef class OBIDMS:

View File

@ -30,243 +30,13 @@ from ._obidms cimport OBIDMS, \
from ._obitaxo cimport OBI_Taxonomy
from ._obidmscolumn_int cimport OBIDMS_column_int, \
OBIDMS_column_multi_elts_int
from ._obidmscolumn_float cimport OBIDMS_column_float, \
OBIDMS_column_multi_elts_float
from ._obidmscolumn_bool cimport OBIDMS_column_bool, \
OBIDMS_column_multi_elts_bool
from ._obidmscolumn_char cimport OBIDMS_column_char, \
OBIDMS_column_multi_elts_char
from ._obidmscolumn_qual cimport OBIDMS_column_qual, \
OBIDMS_column_multi_elts_qual
from ._obidmscolumn_str cimport OBIDMS_column_str, \
OBIDMS_column_multi_elts_str
from ._obidmscolumn_seq cimport OBIDMS_column_seq, \
OBIDMS_column_multi_elts_seq
from .capi.obiview cimport Obiview_p, \
Obiview_infos_p, \
Alias_column_pair_p, \
obi_new_view_nuc_seqs, \
obi_new_view, \
obi_new_view_cloned_from_name, \
obi_new_view_nuc_seqs_cloned_from_name, \
obi_view_map_file, \
obi_view_unmap_file, \
obi_open_view, \
obi_view_delete_column, \
obi_view_add_column, \
obi_view_create_column_alias, \
obi_view_get_column, \
obi_view_get_pointer_on_column_in_view, \
obi_save_and_close_view, \
VIEW_TYPE_NUC_SEQS, \
NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
QUALITY_COLUMN
from libc.stdlib cimport malloc
cdef class OBIDMS_column :
# Note: should only be initialized through a subclass
def __init__(self, OBIView view, str column_alias):
cdef OBIDMS_column_p column_p
cdef OBIDMS_column_p* column_pp
column_pp = obi_view_get_pointer_on_column_in_view(view._pointer, str2bytes(column_alias))
column_p = column_pp[0] # TODO ugly cython dereferencing but can't find better
# Fill structure
self._alias = column_alias
self._pointer = column_pp
self._view = view
def __setitem__(self, index_t line_nb, object value):
self.set_line(line_nb, value)
def __getitem__(self, index_t line_nb):
return self.get_line(line_nb)
def __len__(self):
return self.lines_used
def __sizeof__(self):
return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size)
def __iter__(self):
# Declarations
cdef index_t line_nb
# Yield each line
for line_nb in range(self.lines_used):
yield self.get_line(line_nb)
def __str__(self) :
cdef str to_print
to_print = ''
for line in self :
to_print = to_print + str(line) + "\n"
return to_print
def __repr__(self) :
return (self._alias + ", original name: " + self.original_name + ", version " + str(self.version) + ", data type: " + self.data_type)
cpdef close(self):
if obi_close_column((self._pointer)[0]) < 0 :
raise Exception("Problem closing a column")
# Column alias property getter and setter
@property
def alias(self):
return self._alias
@alias.setter
def alias(self, new_alias): # @DuplicatedSignature
self._view.change_column_alias(self._alias, new_alias)
# elements_names property getter
@property
def elements_names(self):
return (bytes2str(((self._pointer)[0].header).elements_names)).split(';')
# nb_elements_per_line property getter
@property
def nb_elements_per_line(self):
return ((self._pointer)[0].header).nb_elements_per_line
# data_type property getter
@property
def data_type(self):
return bytes2str(name_data_type(((self._pointer)[0].header).returned_data_type))
# original_name property getter
@property
def original_name(self):
return bytes2str(((self._pointer)[0].header).name)
# version property getter
@property
def version(self):
return ((self._pointer)[0].header).version
# lines_used property getter
@property
def lines_used(self):
return (self._pointer)[0].header.lines_used
# comments property getter
@property
def comments(self):
return bytes2str((self._pointer)[0].header.comments)
# creation_date property getter
@property
def creation_date(self):
return bytes2str(obi_format_date((self._pointer)[0].header.creation_date))
@staticmethod
cdef object get_subclass_type(OBIDMS_column_p column_p) :
cdef object subclass
cdef OBIDMS_column_header_p header
cdef OBIType_t col_type
cdef bint col_writable
cdef bint col_one_element_per_line
header = column_p.header
col_type = header.returned_data_type
col_writable = column_p.writable
col_one_element_per_line = ((header.nb_elements_per_line) == 1)
if col_type == OBI_INT :
if col_one_element_per_line :
subclass = OBIDMS_column_int
else :
subclass = OBIDMS_column_multi_elts_int
elif col_type == OBI_FLOAT :
if col_one_element_per_line :
subclass = OBIDMS_column_float
else :
subclass = OBIDMS_column_multi_elts_float
elif col_type == OBI_BOOL :
if col_one_element_per_line :
subclass = OBIDMS_column_bool
else :
subclass = OBIDMS_column_multi_elts_bool
elif col_type == OBI_CHAR :
if col_one_element_per_line :
subclass = OBIDMS_column_char
else :
subclass = OBIDMS_column_multi_elts_char
elif col_type == OBI_QUAL :
if col_one_element_per_line :
subclass = OBIDMS_column_qual
else :
subclass = OBIDMS_column_multi_elts_qual
elif col_type == OBI_STR :
if col_one_element_per_line :
subclass = OBIDMS_column_str
else :
subclass = OBIDMS_column_multi_elts_str
elif col_type == OBI_SEQ :
if col_one_element_per_line :
subclass = OBIDMS_column_seq
else :
subclass = OBIDMS_column_multi_elts_seq
else :
raise Exception("Problem with the data type")
return subclass
######################################################################################################
cdef class OBIDMS_column_multi_elts(OBIDMS_column) :
def __getitem__(self, index_t line_nb):
return OBIDMS_column_line(self, line_nb)
cpdef set_line(self, index_t line_nb, dict values):
for element_name in values :
self.set_item(line_nb, element_name, values[element_name])
######################################################################################################
cdef class OBIDMS_column_line :
def __init__(self, OBIDMS_column column, index_t line_nb) :
self._index = line_nb
self._column = column
def __getitem__(self, str element_name) :
return self._column.get_item(self._index, element_name)
def __setitem__(self, str element_name, object value):
self._column.set_item(self._index, element_name, value)
def __contains__(self, str element_name):
return (element_name in self._column.elements_names)
def __repr__(self) :
return str(self._column.get_line(self._index))
######################################################################################################
@ -277,84 +47,6 @@ cdef class OBIDMS_column_line :
######################################################################################################
cdef class OBIView_line :
def __init__(self, OBIView view, index_t line_nb) :
self._index = line_nb
self._view = view
def __getitem__(self, str column_name) :
return ((self._view)._columns)[column_name][self._index]
def __setitem__(self, str column_name, object value):
# TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get)
# TODO OBI_QUAL ?
cdef type value_type
cdef str value_obitype
cdef bytes value_b
if column_name not in self._view :
if value == None :
raise Exception("Trying to create a column from a None value (can't guess type)")
value_type = type(value)
if value_type == int :
value_obitype = 'OBI_INT'
elif value_type == float :
value_obitype = 'OBI_FLOAT'
elif value_type == bool :
value_obitype = 'OBI_BOOL'
elif value_type == str or value_type == bytes :
if value_type == str :
value_b = str2bytes(value)
else :
value_b = value
if only_ATGC(value_b) : # TODO detect IUPAC
value_obitype = 'OBI_SEQ'
elif len(value) == 1 :
value_obitype = 'OBI_CHAR'
elif (len(value) > 1) :
value_obitype = 'OBI_STR'
else :
raise Exception("Could not guess the type of a value to create a new column")
self._view.add_column(column_name, type=value_obitype)
(((self._view)._columns)[column_name]).set_line(self._index, value)
def __iter__(self):
for column_name in ((self._view)._columns) :
yield column_name
def __contains__(self, str column_name):
return (column_name in self._view._columns)
def __repr__(self):
cdef dict line
cdef str column_name
line = {}
for column_name in self._view._columns :
line[column_name] = self[column_name]
return str(line)
######################################################################################################
cdef class OBIView_line_selection(list):
def __init__(self, OBIView view) :
if view._pointer == NULL:
raise Exception("Error: trying to create a line selection with an invalidated view")
self._view = view
self._view_name = view.name
def append(self, index_t idx) :
if idx >= self._view.line_count :
raise Exception("Error: trying to select a line beyond the line count of a view")
# if idx in self : # TODO discuss. Discuss order too
# pass
# else :
super(OBIView_line_selection, self).append(idx)
######################################################################################################

View File

@ -0,0 +1,65 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
../../../src/obi_align.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_blob.c
../../../src/obidmscolumn_blob.h
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c
../../../src/uint8_indexer.h
../../../src/uint8_indexer.c
../../../src/upperband.h
../../../src/upperband.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,52 @@
#cython: language_level=3
from .capi.obidmscolumn cimport OBIDMS_column_p
from .capi.obiview cimport Obiview_p
from .capi.obitypes cimport obiversion_t, OBIType_t, index_t
from ._obidmscolumn_int cimport OBIDMS_column_int, \
OBIDMS_column_multi_elts_int
from ._obidmscolumn_float cimport OBIDMS_column_float, \
OBIDMS_column_multi_elts_float
from ._obidmscolumn_bool cimport OBIDMS_column_bool, \
OBIDMS_column_multi_elts_bool
from ._obidmscolumn_char cimport OBIDMS_column_char, \
OBIDMS_column_multi_elts_char
from ._obidmscolumn_qual cimport OBIDMS_column_qual, \
OBIDMS_column_multi_elts_qual
from ._obidmscolumn_str cimport OBIDMS_column_str, \
OBIDMS_column_multi_elts_str
from ._obidmscolumn_seq cimport OBIDMS_column_seq, \
OBIDMS_column_multi_elts_seq
from ._obiview cimport OBIView
cdef class OBIDMS_column:
cdef str _alias # associated property: alias
cdef OBIDMS_column_p* _pointer
cdef OBIView _view
cpdef close(self)
@staticmethod
cdef object get_subclass_type(OBIDMS_column_p column_p)
cdef class OBIDMS_column_multi_elts(OBIDMS_column):
cpdef set_line(self, index_t line_nb, dict values)
cdef class OBIDMS_column_line:
cdef OBIDMS_column _column
cdef index_t _index

View File

@ -0,0 +1,191 @@
#cython: language_level=3
cdef class OBIDMS_column :
# Note: should only be initialized through a subclass
def __init__(self, OBIView view, str column_alias):
cdef OBIDMS_column_p column_p
cdef OBIDMS_column_p* column_pp
column_pp = obi_view_get_pointer_on_column_in_view(view._pointer, str2bytes(column_alias))
column_p = column_pp[0] # TODO ugly cython dereferencing but can't find better
# Fill structure
self._alias = column_alias
self._pointer = column_pp
self._view = view
def __setitem__(self, index_t line_nb, object value):
self.set_line(line_nb, value)
def __getitem__(self, index_t line_nb):
return self.get_line(line_nb)
def __len__(self):
return self.lines_used
def __sizeof__(self):
return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size)
def __iter__(self):
# Declarations
cdef index_t line_nb
# Yield each line
for line_nb in range(self.lines_used):
yield self.get_line(line_nb)
def __str__(self) :
cdef str to_print
to_print = ''
for line in self :
to_print = to_print + str(line) + "\n"
return to_print
def __repr__(self) :
return (self._alias + ", original name: " + self.original_name + ", version " + str(self.version) + ", data type: " + self.data_type)
cpdef close(self):
if obi_close_column((self._pointer)[0]) < 0 :
raise Exception("Problem closing a column")
# Column alias property getter and setter
@property
def alias(self):
return self._alias
@alias.setter
def alias(self, new_alias): # @DuplicatedSignature
self._view.change_column_alias(self._alias, new_alias)
# elements_names property getter
@property
def elements_names(self):
return (bytes2str(((self._pointer)[0].header).elements_names)).split(';')
# nb_elements_per_line property getter
@property
def nb_elements_per_line(self):
return ((self._pointer)[0].header).nb_elements_per_line
# data_type property getter
@property
def data_type(self):
return bytes2str(name_data_type(((self._pointer)[0].header).returned_data_type))
# original_name property getter
@property
def original_name(self):
return bytes2str(((self._pointer)[0].header).name)
# version property getter
@property
def version(self):
return ((self._pointer)[0].header).version
# lines_used property getter
@property
def lines_used(self):
return (self._pointer)[0].header.lines_used
# comments property getter
@property
def comments(self):
return bytes2str((self._pointer)[0].header.comments)
# creation_date property getter
@property
def creation_date(self):
return bytes2str(obi_format_date((self._pointer)[0].header.creation_date))
@staticmethod
cdef object get_subclass_type(OBIDMS_column_p column_p) :
cdef object subclass
cdef OBIDMS_column_header_p header
cdef OBIType_t col_type
cdef bint col_writable
cdef bint col_one_element_per_line
header = column_p.header
col_type = header.returned_data_type
col_writable = column_p.writable
col_one_element_per_line = ((header.nb_elements_per_line) == 1)
if col_type == OBI_INT :
if col_one_element_per_line :
subclass = OBIDMS_column_int
else :
subclass = OBIDMS_column_multi_elts_int
elif col_type == OBI_FLOAT :
if col_one_element_per_line :
subclass = OBIDMS_column_float
else :
subclass = OBIDMS_column_multi_elts_float
elif col_type == OBI_BOOL :
if col_one_element_per_line :
subclass = OBIDMS_column_bool
else :
subclass = OBIDMS_column_multi_elts_bool
elif col_type == OBI_CHAR :
if col_one_element_per_line :
subclass = OBIDMS_column_char
else :
subclass = OBIDMS_column_multi_elts_char
elif col_type == OBI_QUAL :
if col_one_element_per_line :
subclass = OBIDMS_column_qual
else :
subclass = OBIDMS_column_multi_elts_qual
elif col_type == OBI_STR :
if col_one_element_per_line :
subclass = OBIDMS_column_str
else :
subclass = OBIDMS_column_multi_elts_str
elif col_type == OBI_SEQ :
if col_one_element_per_line :
subclass = OBIDMS_column_seq
else :
subclass = OBIDMS_column_multi_elts_seq
else :
raise Exception("Problem with the data type")
return subclass
######################################################################################################
cdef class OBIDMS_column_multi_elts(OBIDMS_column) :
def __getitem__(self, index_t line_nb):
return OBIDMS_column_line(self, line_nb)
cpdef set_line(self, index_t line_nb, dict values):
for element_name in values :
self.set_item(line_nb, element_name, values[element_name])
######################################################################################################
cdef class OBIDMS_column_line :
def __init__(self, OBIDMS_column column, index_t line_nb) :
self._index = line_nb
self._column = column
def __getitem__(self, str element_name) :
return self._column.get_item(self._index, element_name)
def __setitem__(self, str element_name, object value):
self._column.set_item(self._index, element_name, value)
def __contains__(self, str element_name):
return (element_name in self._column.elements_names)
def __repr__(self) :
return str(self._column.get_line(self._index))
######################################################################################################

View File

@ -1,5 +1,27 @@
#cython: language_level=3
from .capi.obiview cimport Obiview_p, \
Obiview_infos_p, \
Alias_column_pair_p, \
obi_new_view_nuc_seqs, \
obi_new_view, \
obi_new_view_cloned_from_name, \
obi_new_view_nuc_seqs_cloned_from_name, \
obi_view_map_file, \
obi_view_unmap_file, \
obi_open_view, \
obi_view_delete_column, \
obi_view_add_column, \
obi_view_create_column_alias, \
obi_view_get_column, \
obi_view_get_pointer_on_column_in_view, \
obi_save_and_close_view, \
VIEW_TYPE_NUC_SEQS, \
NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
QUALITY_COLUMN
cdef class OBIView:
cdef Obiview_p _pointer
@ -52,3 +74,9 @@ cdef class OBIView_line_selection(list):
str view_name,
str comments=*)
cdef class OBIView_line :
cdef index_t _index
cdef OBIView _view

View File

@ -322,3 +322,63 @@ cdef class OBIView_line_selection(list):
return view
cdef class OBIView_line :
def __init__(self, OBIView view, index_t line_nb) :
self._index = line_nb
self._view = view
def __getitem__(self, str column_name) :
return ((self._view)._columns)[column_name][self._index]
def __setitem__(self, str column_name, object value):
# TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get)
# TODO OBI_QUAL ?
cdef type value_type
cdef str value_obitype
cdef bytes value_b
if column_name not in self._view :
if value == None :
raise Exception("Trying to create a column from a None value (can't guess type)")
value_type = type(value)
if value_type == int :
value_obitype = 'OBI_INT'
elif value_type == float :
value_obitype = 'OBI_FLOAT'
elif value_type == bool :
value_obitype = 'OBI_BOOL'
elif value_type == str or value_type == bytes :
if value_type == str :
value_b = str2bytes(value)
else :
value_b = value
if only_ATGC(value_b) : # TODO detect IUPAC
value_obitype = 'OBI_SEQ'
elif len(value) == 1 :
value_obitype = 'OBI_CHAR'
elif (len(value) > 1) :
value_obitype = 'OBI_STR'
else :
raise Exception("Could not guess the type of a value to create a new column")
self._view.add_column(column_name, type=value_obitype)
(((self._view)._columns)[column_name]).set_line(self._index, value)
def __iter__(self):
for column_name in ((self._view)._columns) :
yield column_name
def __contains__(self, str column_name):
return (column_name in self._view._columns)
def __repr__(self):
cdef dict line
cdef str column_name
line = {}
for column_name in self._view._columns :
line[column_name] = self[column_name]
return str(line)