diff --git a/python/obitools3/obidms/_obidms.pxd b/python/obitools3/obidms/_obidms.pxd index 85866c7..9777c16 100644 --- a/python/obitools3/obidms/_obidms.pxd +++ b/python/obitools3/obidms/_obidms.pxd @@ -7,42 +7,6 @@ from .capi.obitypes cimport obiversion_t, OBIType_t, index_t from ._obitaxo cimport OBI_Taxonomy -cdef class OBIDMS_column: - - cdef str _alias # associated property: alias - cdef OBIDMS_column_p* _pointer - cdef OBIView _view - - cpdef close(self) - - @staticmethod - cdef object get_subclass_type(OBIDMS_column_p column_p) - - -cdef class OBIDMS_column_multi_elts(OBIDMS_column): - - cpdef set_line(self, index_t line_nb, dict values) - - -cdef class OBIDMS_column_line: - - cdef OBIDMS_column _column - cdef index_t _index - - - - -cdef class OBIView_line : - - cdef index_t _index - cdef OBIView _view - - -cdef class OBIView_line_selection(list): - - cdef OBIView _view - cdef str _view_name - cdef class OBIDMS: diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx index 1d35009..2bc083b 100644 --- a/python/obitools3/obidms/_obidms.pyx +++ b/python/obitools3/obidms/_obidms.pyx @@ -30,243 +30,13 @@ from ._obidms cimport OBIDMS, \ from ._obitaxo cimport OBI_Taxonomy -from ._obidmscolumn_int cimport OBIDMS_column_int, \ - OBIDMS_column_multi_elts_int - -from ._obidmscolumn_float cimport OBIDMS_column_float, \ - OBIDMS_column_multi_elts_float -from ._obidmscolumn_bool cimport OBIDMS_column_bool, \ - OBIDMS_column_multi_elts_bool - -from ._obidmscolumn_char cimport OBIDMS_column_char, \ - OBIDMS_column_multi_elts_char - -from ._obidmscolumn_qual cimport OBIDMS_column_qual, \ - OBIDMS_column_multi_elts_qual - -from ._obidmscolumn_str cimport OBIDMS_column_str, \ - OBIDMS_column_multi_elts_str - -from ._obidmscolumn_seq cimport OBIDMS_column_seq, \ - OBIDMS_column_multi_elts_seq - -from .capi.obiview cimport Obiview_p, \ - Obiview_infos_p, \ - Alias_column_pair_p, \ - obi_new_view_nuc_seqs, \ - obi_new_view, \ - obi_new_view_cloned_from_name, \ - obi_new_view_nuc_seqs_cloned_from_name, \ - obi_view_map_file, \ - obi_view_unmap_file, \ - obi_open_view, \ - obi_view_delete_column, \ - obi_view_add_column, \ - obi_view_create_column_alias, \ - obi_view_get_column, \ - obi_view_get_pointer_on_column_in_view, \ - obi_save_and_close_view, \ - VIEW_TYPE_NUC_SEQS, \ - NUC_SEQUENCE_COLUMN, \ - ID_COLUMN, \ - DEFINITION_COLUMN, \ - QUALITY_COLUMN from libc.stdlib cimport malloc -cdef class OBIDMS_column : - - # Note: should only be initialized through a subclass - def __init__(self, OBIView view, str column_alias): - - cdef OBIDMS_column_p column_p - cdef OBIDMS_column_p* column_pp - - column_pp = obi_view_get_pointer_on_column_in_view(view._pointer, str2bytes(column_alias)) - column_p = column_pp[0] # TODO ugly cython dereferencing but can't find better - - # Fill structure - self._alias = column_alias - self._pointer = column_pp - self._view = view - - def __setitem__(self, index_t line_nb, object value): - self.set_line(line_nb, value) - - def __getitem__(self, index_t line_nb): - return self.get_line(line_nb) - - def __len__(self): - return self.lines_used - - def __sizeof__(self): - return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size) - - def __iter__(self): - # Declarations - cdef index_t line_nb - # Yield each line - for line_nb in range(self.lines_used): - yield self.get_line(line_nb) - - def __str__(self) : - cdef str to_print - to_print = '' - for line in self : - to_print = to_print + str(line) + "\n" - return to_print - - def __repr__(self) : - return (self._alias + ", original name: " + self.original_name + ", version " + str(self.version) + ", data type: " + self.data_type) - - cpdef close(self): - if obi_close_column((self._pointer)[0]) < 0 : - raise Exception("Problem closing a column") - - # Column alias property getter and setter - @property - def alias(self): - return self._alias - @alias.setter - def alias(self, new_alias): # @DuplicatedSignature - self._view.change_column_alias(self._alias, new_alias) - - # elements_names property getter - @property - def elements_names(self): - return (bytes2str(((self._pointer)[0].header).elements_names)).split(';') - - # nb_elements_per_line property getter - @property - def nb_elements_per_line(self): - return ((self._pointer)[0].header).nb_elements_per_line - - # data_type property getter - @property - def data_type(self): - return bytes2str(name_data_type(((self._pointer)[0].header).returned_data_type)) - - # original_name property getter - @property - def original_name(self): - return bytes2str(((self._pointer)[0].header).name) - - # version property getter - @property - def version(self): - return ((self._pointer)[0].header).version - - # lines_used property getter - @property - def lines_used(self): - return (self._pointer)[0].header.lines_used - - # comments property getter - @property - def comments(self): - return bytes2str((self._pointer)[0].header.comments) - - # creation_date property getter - @property - def creation_date(self): - return bytes2str(obi_format_date((self._pointer)[0].header.creation_date)) - - @staticmethod - cdef object get_subclass_type(OBIDMS_column_p column_p) : - - cdef object subclass - cdef OBIDMS_column_header_p header - cdef OBIType_t col_type - cdef bint col_writable - cdef bint col_one_element_per_line - - header = column_p.header - col_type = header.returned_data_type - col_writable = column_p.writable - col_one_element_per_line = ((header.nb_elements_per_line) == 1) - - if col_type == OBI_INT : - if col_one_element_per_line : - subclass = OBIDMS_column_int - else : - subclass = OBIDMS_column_multi_elts_int - elif col_type == OBI_FLOAT : - if col_one_element_per_line : - subclass = OBIDMS_column_float - else : - subclass = OBIDMS_column_multi_elts_float - elif col_type == OBI_BOOL : - if col_one_element_per_line : - subclass = OBIDMS_column_bool - else : - subclass = OBIDMS_column_multi_elts_bool - elif col_type == OBI_CHAR : - if col_one_element_per_line : - subclass = OBIDMS_column_char - else : - subclass = OBIDMS_column_multi_elts_char - elif col_type == OBI_QUAL : - if col_one_element_per_line : - subclass = OBIDMS_column_qual - else : - subclass = OBIDMS_column_multi_elts_qual - elif col_type == OBI_STR : - if col_one_element_per_line : - subclass = OBIDMS_column_str - else : - subclass = OBIDMS_column_multi_elts_str - elif col_type == OBI_SEQ : - if col_one_element_per_line : - subclass = OBIDMS_column_seq - else : - subclass = OBIDMS_column_multi_elts_seq - else : - raise Exception("Problem with the data type") - - return subclass - - -###################################################################################################### - - -cdef class OBIDMS_column_multi_elts(OBIDMS_column) : - - def __getitem__(self, index_t line_nb): - return OBIDMS_column_line(self, line_nb) - - cpdef set_line(self, index_t line_nb, dict values): - for element_name in values : - self.set_item(line_nb, element_name, values[element_name]) - - -###################################################################################################### - - -cdef class OBIDMS_column_line : - - def __init__(self, OBIDMS_column column, index_t line_nb) : - self._index = line_nb - self._column = column - - def __getitem__(self, str element_name) : - return self._column.get_item(self._index, element_name) - - def __setitem__(self, str element_name, object value): - self._column.set_item(self._index, element_name, value) - - def __contains__(self, str element_name): - return (element_name in self._column.elements_names) - - def __repr__(self) : - return str(self._column.get_line(self._index)) - - -###################################################################################################### - @@ -277,84 +47,6 @@ cdef class OBIDMS_column_line : ###################################################################################################### -cdef class OBIView_line : - - def __init__(self, OBIView view, index_t line_nb) : - self._index = line_nb - self._view = view - - def __getitem__(self, str column_name) : - return ((self._view)._columns)[column_name][self._index] - - def __setitem__(self, str column_name, object value): - # TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get) - # TODO OBI_QUAL ? - cdef type value_type - cdef str value_obitype - cdef bytes value_b - - if column_name not in self._view : - if value == None : - raise Exception("Trying to create a column from a None value (can't guess type)") - value_type = type(value) - if value_type == int : - value_obitype = 'OBI_INT' - elif value_type == float : - value_obitype = 'OBI_FLOAT' - elif value_type == bool : - value_obitype = 'OBI_BOOL' - elif value_type == str or value_type == bytes : - if value_type == str : - value_b = str2bytes(value) - else : - value_b = value - if only_ATGC(value_b) : # TODO detect IUPAC - value_obitype = 'OBI_SEQ' - elif len(value) == 1 : - value_obitype = 'OBI_CHAR' - elif (len(value) > 1) : - value_obitype = 'OBI_STR' - else : - raise Exception("Could not guess the type of a value to create a new column") - self._view.add_column(column_name, type=value_obitype) - - (((self._view)._columns)[column_name]).set_line(self._index, value) - - def __iter__(self): - for column_name in ((self._view)._columns) : - yield column_name - - def __contains__(self, str column_name): - return (column_name in self._view._columns) - - def __repr__(self): - cdef dict line - cdef str column_name - line = {} - for column_name in self._view._columns : - line[column_name] = self[column_name] - return str(line) - - -###################################################################################################### - - -cdef class OBIView_line_selection(list): - - def __init__(self, OBIView view) : - if view._pointer == NULL: - raise Exception("Error: trying to create a line selection with an invalidated view") - self._view = view - self._view_name = view.name - - def append(self, index_t idx) : - if idx >= self._view.line_count : - raise Exception("Error: trying to select a line beyond the line count of a view") -# if idx in self : # TODO discuss. Discuss order too -# pass -# else : - super(OBIView_line_selection, self).append(idx) - ###################################################################################################### diff --git a/python/obitools3/obidms/_obidmscolumn.cfiles b/python/obitools3/obidms/_obidmscolumn.cfiles new file mode 100644 index 0000000..84e0436 --- /dev/null +++ b/python/obitools3/obidms/_obidmscolumn.cfiles @@ -0,0 +1,65 @@ +../../../src/bloom.h +../../../src/bloom.c +../../../src/char_str_indexer.h +../../../src/char_str_indexer.c +../../../src/crc64.h +../../../src/crc64.c +../../../src/dna_seq_indexer.h +../../../src/dna_seq_indexer.c +../../../src/encode.h +../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c +../../../src/murmurhash2.h +../../../src/murmurhash2.c +../../../src/obi_align.h +../../../src/obi_align.c +../../../src/obiavl.h +../../../src/obiavl.c +../../../src/obiblob_indexer.h +../../../src/obiblob_indexer.c +../../../src/obiblob.h +../../../src/obiblob.c +../../../src/obidebug.h +../../../src/obidms_taxonomy.h +../../../src/obidms_taxonomy.c +../../../src/obidms.h +../../../src/obidms.c +../../../src/obidmscolumn_blob.c +../../../src/obidmscolumn_blob.h +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h +../../../src/obidmscolumn_idx.h +../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h +../../../src/obidmscolumn.h +../../../src/obidmscolumn.c +../../../src/obidmscolumndir.h +../../../src/obidmscolumndir.c +../../../src/obierrno.h +../../../src/obierrno.c +../../../src/obilittlebigman.h +../../../src/obilittlebigman.c +../../../src/obitypes.h +../../../src/obitypes.c +../../../src/obiview.h +../../../src/obiview.c +../../../src/sse_banded_LCS_alignment.h +../../../src/sse_banded_LCS_alignment.c +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/upperband.h +../../../src/upperband.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obidmscolumn.pxd b/python/obitools3/obidms/_obidmscolumn.pxd new file mode 100644 index 0000000..8ffaf9f --- /dev/null +++ b/python/obitools3/obidms/_obidmscolumn.pxd @@ -0,0 +1,52 @@ +#cython: language_level=3 + +from .capi.obidmscolumn cimport OBIDMS_column_p +from .capi.obiview cimport Obiview_p +from .capi.obitypes cimport obiversion_t, OBIType_t, index_t + +from ._obidmscolumn_int cimport OBIDMS_column_int, \ + OBIDMS_column_multi_elts_int + +from ._obidmscolumn_float cimport OBIDMS_column_float, \ + OBIDMS_column_multi_elts_float + +from ._obidmscolumn_bool cimport OBIDMS_column_bool, \ + OBIDMS_column_multi_elts_bool + +from ._obidmscolumn_char cimport OBIDMS_column_char, \ + OBIDMS_column_multi_elts_char + +from ._obidmscolumn_qual cimport OBIDMS_column_qual, \ + OBIDMS_column_multi_elts_qual + +from ._obidmscolumn_str cimport OBIDMS_column_str, \ + OBIDMS_column_multi_elts_str + +from ._obidmscolumn_seq cimport OBIDMS_column_seq, \ + OBIDMS_column_multi_elts_seq + + +from ._obiview cimport OBIView + +cdef class OBIDMS_column: + + cdef str _alias # associated property: alias + cdef OBIDMS_column_p* _pointer + cdef OBIView _view + + cpdef close(self) + + @staticmethod + cdef object get_subclass_type(OBIDMS_column_p column_p) + + +cdef class OBIDMS_column_multi_elts(OBIDMS_column): + + cpdef set_line(self, index_t line_nb, dict values) + + +cdef class OBIDMS_column_line: + + cdef OBIDMS_column _column + cdef index_t _index + diff --git a/python/obitools3/obidms/_obidmscolumn.pyx b/python/obitools3/obidms/_obidmscolumn.pyx new file mode 100644 index 0000000..b7ffbd3 --- /dev/null +++ b/python/obitools3/obidms/_obidmscolumn.pyx @@ -0,0 +1,191 @@ +#cython: language_level=3 + +cdef class OBIDMS_column : + + # Note: should only be initialized through a subclass + def __init__(self, OBIView view, str column_alias): + + cdef OBIDMS_column_p column_p + cdef OBIDMS_column_p* column_pp + + column_pp = obi_view_get_pointer_on_column_in_view(view._pointer, str2bytes(column_alias)) + column_p = column_pp[0] # TODO ugly cython dereferencing but can't find better + + # Fill structure + self._alias = column_alias + self._pointer = column_pp + self._view = view + + def __setitem__(self, index_t line_nb, object value): + self.set_line(line_nb, value) + + def __getitem__(self, index_t line_nb): + return self.get_line(line_nb) + + def __len__(self): + return self.lines_used + + def __sizeof__(self): + return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size) + + def __iter__(self): + # Declarations + cdef index_t line_nb + # Yield each line + for line_nb in range(self.lines_used): + yield self.get_line(line_nb) + + def __str__(self) : + cdef str to_print + to_print = '' + for line in self : + to_print = to_print + str(line) + "\n" + return to_print + + def __repr__(self) : + return (self._alias + ", original name: " + self.original_name + ", version " + str(self.version) + ", data type: " + self.data_type) + + cpdef close(self): + if obi_close_column((self._pointer)[0]) < 0 : + raise Exception("Problem closing a column") + + # Column alias property getter and setter + @property + def alias(self): + return self._alias + @alias.setter + def alias(self, new_alias): # @DuplicatedSignature + self._view.change_column_alias(self._alias, new_alias) + + # elements_names property getter + @property + def elements_names(self): + return (bytes2str(((self._pointer)[0].header).elements_names)).split(';') + + # nb_elements_per_line property getter + @property + def nb_elements_per_line(self): + return ((self._pointer)[0].header).nb_elements_per_line + + # data_type property getter + @property + def data_type(self): + return bytes2str(name_data_type(((self._pointer)[0].header).returned_data_type)) + + # original_name property getter + @property + def original_name(self): + return bytes2str(((self._pointer)[0].header).name) + + # version property getter + @property + def version(self): + return ((self._pointer)[0].header).version + + # lines_used property getter + @property + def lines_used(self): + return (self._pointer)[0].header.lines_used + + # comments property getter + @property + def comments(self): + return bytes2str((self._pointer)[0].header.comments) + + # creation_date property getter + @property + def creation_date(self): + return bytes2str(obi_format_date((self._pointer)[0].header.creation_date)) + + @staticmethod + cdef object get_subclass_type(OBIDMS_column_p column_p) : + + cdef object subclass + cdef OBIDMS_column_header_p header + cdef OBIType_t col_type + cdef bint col_writable + cdef bint col_one_element_per_line + + header = column_p.header + col_type = header.returned_data_type + col_writable = column_p.writable + col_one_element_per_line = ((header.nb_elements_per_line) == 1) + + if col_type == OBI_INT : + if col_one_element_per_line : + subclass = OBIDMS_column_int + else : + subclass = OBIDMS_column_multi_elts_int + elif col_type == OBI_FLOAT : + if col_one_element_per_line : + subclass = OBIDMS_column_float + else : + subclass = OBIDMS_column_multi_elts_float + elif col_type == OBI_BOOL : + if col_one_element_per_line : + subclass = OBIDMS_column_bool + else : + subclass = OBIDMS_column_multi_elts_bool + elif col_type == OBI_CHAR : + if col_one_element_per_line : + subclass = OBIDMS_column_char + else : + subclass = OBIDMS_column_multi_elts_char + elif col_type == OBI_QUAL : + if col_one_element_per_line : + subclass = OBIDMS_column_qual + else : + subclass = OBIDMS_column_multi_elts_qual + elif col_type == OBI_STR : + if col_one_element_per_line : + subclass = OBIDMS_column_str + else : + subclass = OBIDMS_column_multi_elts_str + elif col_type == OBI_SEQ : + if col_one_element_per_line : + subclass = OBIDMS_column_seq + else : + subclass = OBIDMS_column_multi_elts_seq + else : + raise Exception("Problem with the data type") + + return subclass + + +###################################################################################################### + + +cdef class OBIDMS_column_multi_elts(OBIDMS_column) : + + def __getitem__(self, index_t line_nb): + return OBIDMS_column_line(self, line_nb) + + cpdef set_line(self, index_t line_nb, dict values): + for element_name in values : + self.set_item(line_nb, element_name, values[element_name]) + + +###################################################################################################### + + +cdef class OBIDMS_column_line : + + def __init__(self, OBIDMS_column column, index_t line_nb) : + self._index = line_nb + self._column = column + + def __getitem__(self, str element_name) : + return self._column.get_item(self._index, element_name) + + def __setitem__(self, str element_name, object value): + self._column.set_item(self._index, element_name, value) + + def __contains__(self, str element_name): + return (element_name in self._column.elements_names) + + def __repr__(self) : + return str(self._column.get_line(self._index)) + + +###################################################################################################### + diff --git a/python/obitools3/obidms/_obiview.pxd b/python/obitools3/obidms/_obiview.pxd index f49bfc7..ecbac30 100644 --- a/python/obitools3/obidms/_obiview.pxd +++ b/python/obitools3/obidms/_obiview.pxd @@ -1,5 +1,27 @@ #cython: language_level=3 +from .capi.obiview cimport Obiview_p, \ + Obiview_infos_p, \ + Alias_column_pair_p, \ + obi_new_view_nuc_seqs, \ + obi_new_view, \ + obi_new_view_cloned_from_name, \ + obi_new_view_nuc_seqs_cloned_from_name, \ + obi_view_map_file, \ + obi_view_unmap_file, \ + obi_open_view, \ + obi_view_delete_column, \ + obi_view_add_column, \ + obi_view_create_column_alias, \ + obi_view_get_column, \ + obi_view_get_pointer_on_column_in_view, \ + obi_save_and_close_view, \ + VIEW_TYPE_NUC_SEQS, \ + NUC_SEQUENCE_COLUMN, \ + ID_COLUMN, \ + DEFINITION_COLUMN, \ + QUALITY_COLUMN + cdef class OBIView: cdef Obiview_p _pointer @@ -52,3 +74,9 @@ cdef class OBIView_line_selection(list): str view_name, str comments=*) +cdef class OBIView_line : + + cdef index_t _index + cdef OBIView _view + + diff --git a/python/obitools3/obidms/_obiview.pyx b/python/obitools3/obidms/_obiview.pyx index 9742e2d..519e441 100644 --- a/python/obitools3/obidms/_obiview.pyx +++ b/python/obitools3/obidms/_obiview.pyx @@ -322,3 +322,63 @@ cdef class OBIView_line_selection(list): return view +cdef class OBIView_line : + + def __init__(self, OBIView view, index_t line_nb) : + self._index = line_nb + self._view = view + + def __getitem__(self, str column_name) : + return ((self._view)._columns)[column_name][self._index] + + def __setitem__(self, str column_name, object value): + # TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get) + # TODO OBI_QUAL ? + cdef type value_type + cdef str value_obitype + cdef bytes value_b + + if column_name not in self._view : + if value == None : + raise Exception("Trying to create a column from a None value (can't guess type)") + value_type = type(value) + if value_type == int : + value_obitype = 'OBI_INT' + elif value_type == float : + value_obitype = 'OBI_FLOAT' + elif value_type == bool : + value_obitype = 'OBI_BOOL' + elif value_type == str or value_type == bytes : + if value_type == str : + value_b = str2bytes(value) + else : + value_b = value + if only_ATGC(value_b) : # TODO detect IUPAC + value_obitype = 'OBI_SEQ' + elif len(value) == 1 : + value_obitype = 'OBI_CHAR' + elif (len(value) > 1) : + value_obitype = 'OBI_STR' + else : + raise Exception("Could not guess the type of a value to create a new column") + self._view.add_column(column_name, type=value_obitype) + + (((self._view)._columns)[column_name]).set_line(self._index, value) + + def __iter__(self): + for column_name in ((self._view)._columns) : + yield column_name + + def __contains__(self, str column_name): + return (column_name in self._view._columns) + + def __repr__(self): + cdef dict line + cdef str column_name + line = {} + for column_name in self._view._columns : + line[column_name] = self[column_name] + return str(line) + + +