From 0526386337d9f865e5a75d149f746d80780e8ecc Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 27 Dec 2016 06:17:45 +0100 Subject: [PATCH] first working DMS class --- python/obitools3/dms/__init__.py | 2 + python/obitools3/dms/capi/obidms.pxd | 4 + python/obitools3/dms/capi/obidmscolumn.pxd | 8 +- python/obitools3/dms/capi/obitypes.pxd | 1 + python/obitools3/dms/capi/obiview.pxd | 4 +- python/obitools3/dms/column.pxd | 38 +-- python/obitools3/dms/column.pyx | 67 +++-- python/obitools3/dms/dms.cfiles | 32 +++ python/obitools3/dms/dms.pxd | 19 +- python/obitools3/dms/dms.pyx | 58 +++-- python/obitools3/dms/view.pxd | 53 ++-- python/obitools3/dms/view.pyx | 280 ++++++++++++--------- python/obitools3/parsers/fasta.pyx | 62 ++++- python/obitools3/utils.pyx | 2 - 14 files changed, 371 insertions(+), 259 deletions(-) create mode 100644 python/obitools3/dms/dms.cfiles diff --git a/python/obitools3/dms/__init__.py b/python/obitools3/dms/__init__.py index e69de29..60bfaf1 100644 --- a/python/obitools3/dms/__init__.py +++ b/python/obitools3/dms/__init__.py @@ -0,0 +1,2 @@ +from .dms import DMS # @UnresolvedImport + diff --git a/python/obitools3/dms/capi/obidms.pxd b/python/obitools3/dms/capi/obidms.pxd index 9bb116f..5f30088 100644 --- a/python/obitools3/dms/capi/obidms.pxd +++ b/python/obitools3/dms/capi/obidms.pxd @@ -10,3 +10,7 @@ cdef extern from "obidms.h" nogil: OBIDMS_p obi_dms(const_char_p dms_name) int obi_close_dms(OBIDMS_p dms) + char* obi_dms_get_dms_path(OBIDMS_p dms) + char* obi_dms_get_full_path(OBIDMS_p dms, const_char_p path_name) + + diff --git a/python/obitools3/dms/capi/obidmscolumn.pxd b/python/obitools3/dms/capi/obidmscolumn.pxd index 2562dda..0bd0a5b 100644 --- a/python/obitools3/dms/capi/obidmscolumn.pxd +++ b/python/obitools3/dms/capi/obidmscolumn.pxd @@ -2,7 +2,7 @@ from ..capi.obidms cimport OBIDMS_p from ..capi.obitypes cimport const_char_p, \ - OBIType_t, \ + obitype_t, \ obiversion_t, \ obiint_t, \ obibool_t, \ @@ -29,8 +29,8 @@ cdef extern from "obidmscolumn.h" nogil: index_t lines_used index_t nb_elements_per_line const_char_p elements_names - OBIType_t returned_data_type - OBIType_t stored_data_type + obitype_t returned_data_type + obitype_t stored_data_type time_t creation_date obiversion_t version obiversion_t cloned_from @@ -50,7 +50,7 @@ cdef extern from "obidmscolumn.h" nogil: OBIDMS_column_p obi_create_column(OBIDMS_p dms, const_char_p column_name, - OBIType_t type, + obitype_t type, index_t nb_lines, index_t nb_elements_per_line, const_char_p elements_names, diff --git a/python/obitools3/dms/capi/obitypes.pxd b/python/obitools3/dms/capi/obitypes.pxd index d765629..efc2fb5 100644 --- a/python/obitools3/dms/capi/obitypes.pxd +++ b/python/obitools3/dms/capi/obitypes.pxd @@ -53,3 +53,4 @@ cdef extern from "obitypes.h" nogil: const_char_p name_data_type(int data_type) +ctypedef OBIType_t obitype_t diff --git a/python/obitools3/dms/capi/obiview.pxd b/python/obitools3/dms/capi/obiview.pxd index 34521ca..4abe361 100644 --- a/python/obitools3/dms/capi/obiview.pxd +++ b/python/obitools3/dms/capi/obiview.pxd @@ -1,7 +1,7 @@ #cython: language_level=3 from .obitypes cimport const_char_p, \ - OBIType_t, \ + obitype_t, \ obiversion_t, \ obiint_t, \ obibool_t, \ @@ -78,7 +78,7 @@ cdef extern from "obiview.h" nogil: const_char_p column_name, obiversion_t version_number, const_char_p alias, - OBIType_t data_type, + obitype_t data_type, index_t nb_lines, index_t nb_elements_per_line, char* elements_names, diff --git a/python/obitools3/dms/column.pxd b/python/obitools3/dms/column.pxd index 94aa986..cbdd86b 100644 --- a/python/obitools3/dms/column.pxd +++ b/python/obitools3/dms/column.pxd @@ -1,41 +1,25 @@ #cython: language_level=3 -from .capi.obidmscolumn cimport OBIDMS_column_p, \ - OBIDMS_column_header_p, \ - obi_close_column, \ - obi_column_prepare_to_get_value - -from .capi.obiview cimport Obiview_p, \ - obi_view_get_pointer_on_column_in_view - -from .capi.obitypes cimport obiversion_t, \ - OBIType_t, \ - index_t, \ - name_data_type - -from .capi.obiutils cimport obi_format_date - +from .capi.obitypes cimport index_t, \ + obitype_t +from .capi.obidmscolumn cimport OBIDMS_column_p + from .view cimport View cdef class Column: - cdef bytes _alias # associated property: alias - cdef OBIDMS_column_p* _pointer - cdef OBIView _view + cdef OBIDMS_column_p* _pointer + cdef View _view cpdef close(self) - @staticmethod - cdef type get_subclass_type(OBIDMS_column_p column_p) - cdef class Column_line: - cdef OBIDMS_column _column - cdef OBIDMS_column_p _column_p - cdef OBIDMS_column_p* _column_pp - cdef index_t _index - cdef int _len + cdef Column _column + cdef index_t _index cpdef update(self, data) -cdef register_column_class(OBIType_t obitype,type classe, type python) +cdef register_column_class(obitype_t obitype, + type classe, + type python) diff --git a/python/obitools3/dms/column.pyx b/python/obitools3/dms/column.pyx index e021d8f..77205ff 100644 --- a/python/obitools3/dms/column.pyx +++ b/python/obitools3/dms/column.pyx @@ -1,7 +1,17 @@ #cython: language_level=3 +from .capi.obitypes cimport name_data_type + +from .capi.obidmscolumn cimport OBIDMS_column_header_p, \ + obi_close_column, \ + obi_column_prepare_to_get_value + +from .capi.obiutils cimport obi_format_date + from .dms cimport __OBIDMS_COLUMN_CLASS__ +from obitools3.utils cimport bytes2str + cdef class Column : """ @@ -9,9 +19,9 @@ cdef class Column : """ # Note: should only be initialized through a subclass - def __init__(self, - View view, - object name): + def __init__(self, + View view, + int __internalCall__): ''' Create a new OBDMS column objected referring to a already created column in the context of a view. @@ -20,22 +30,19 @@ cdef class Column : @param view: The view object containing the column. @type view: OBIView - @param name: The name of the column in the view - @type name: a `str` or a `bytes` ''' cdef OBIDMS_column_p* column_pp + if __internalCall__!=987654: + raise RuntimeError('OBIView constructor cannot be called directly') + # Check that the class is only created as a subclass instance if type(self)==Column or not isinstance(self, Column): raise RuntimeError('OBIDMS.Column constructor cannot be called directly') - - column_pp = obi_view_get_pointer_on_column_in_view(view._pointer, - tobytes(column_alias)) - + # Fill structure - self._alias = column_alias - self._pointer = column_pp + self._pointer = NULL self._view = view def __len__(self): @@ -75,15 +82,16 @@ cdef class Column : ) cpdef close(self): - if obi_close_column((self._pointer)[0]) < 0 : - raise Exception("Problem closing a column") + if self._pointer != NULL: + if obi_close_column(self._pointer[0]) < 0 : + raise Exception("Problem closing column %s" % bytes2str(self.name)) # Column alias property getter and setter @property - def alias(self): + def name(self): return self._alias - @alias.setter - def alias(self, new_alias): # @DuplicatedSignature + @name.setter + def name(self, new_alias): # @DuplicatedSignature self._view.change_column_alias(self._alias, new_alias) # elements_names property getter @@ -126,38 +134,23 @@ cdef class Column : def creation_date(self): return obi_format_date((self._pointer)[0].header.creation_date) - @staticmethod - cdef type get_subclass_type(OBIDMS_column_p column_p) : - - cdef type subclass - cdef OBIType_t col_type - - col_type = column_p.header.returned_data_type - subclass = __OBIDMS_COLUMN_CLASS__[col_type] - - return subclass - - - ###################################################################################################### -cdef class OBIDMS_column_line : +cdef class Column_line : - def __init__(self, OBIDMS_column column, index_t line_nb) : + def __init__(self, Column column, index_t line_nb) : self._index = line_nb self._column = column - self._column_pp = column._pointer - self._column_p = NULL - self._len = self._column_pp[0].header.nb_elements_per_line - if obi_column_prepare_to_get_value(self._column_pp[0],line_nb) < 0: + if obi_column_prepare_to_get_value(self._column._pointer[0],line_nb) < 0: raise IndexError("Cannot access to the line %d" % line_nb) def __contains__(self, str element_name): - return (element_name in self._column.elements_names) + pass + #return (element_name in self._column.elements_names) def __repr__(self) : return str(self._column.get_line(self._index)) @@ -173,7 +166,7 @@ cdef class OBIDMS_column_line : ###################################################################################################### -cdef register_column_class(OBIType_t obitype, +cdef register_column_class(obitype_t obitype, type classe, type python): """ diff --git a/python/obitools3/dms/dms.cfiles b/python/obitools3/dms/dms.cfiles new file mode 100644 index 0000000..f541dda --- /dev/null +++ b/python/obitools3/dms/dms.cfiles @@ -0,0 +1,32 @@ +../../../src/bloom.c +../../../src/char_str_indexer.c +../../../src/crc64.c +../../../src/dna_seq_indexer.c +../../../src/encode.c +../../../src/hashtable.c +../../../src/murmurhash2.c +../../../src/obi_align.c +../../../src/obiavl.c +../../../src/obiblob_indexer.c +../../../src/obiblob.c +../../../src/obidms_taxonomy.c +../../../src/obidms.c +../../../src/obidmscolumn_blob.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn.c +../../../src/obidmscolumndir.c +../../../src/obierrno.c +../../../src/obilittlebigman.c +../../../src/obitypes.c +../../../src/obiview.c +../../../src/sse_banded_LCS_alignment.c +../../../src/uint8_indexer.c +../../../src/upperband.c +../../../src/utils.c diff --git a/python/obitools3/dms/dms.pxd b/python/obitools3/dms/dms.pxd index 167e692..cc11ef2 100644 --- a/python/obitools3/dms/dms.pxd +++ b/python/obitools3/dms/dms.pxd @@ -1,20 +1,25 @@ #cython: language_level=3 from .capi.obidms cimport OBIDMS_p -from .capi.obidmscolumn cimport OBIDMS_column_p -from .capi.obiview cimport Obiview_p -from .capi.obitypes cimport obiversion_t, OBIType_t, index_t -from ._obitaxo cimport OBI_Taxonomy - +from .capi.obitypes cimport obiversion_t, \ + obitype_t, \ + index_t +cdef dict __OBIDMS_COLUMN_CLASS__ +cdef dict __OBIDMS_VIEW_CLASS__ cdef class DMS: cdef OBIDMS_p _pointer + @staticmethod + cdef type get_column_class(obitype_t obitype) + + @staticmethod + cdef type get_python_type(obitype_t obitype) + cpdef close(self) - cpdef Taxonomy open_taxonomy(self, str taxo_name) - cpdef dict read_view_infos(self, objec view_name) + cpdef int view_count(self) # cpdef dict read_views(self) TODO diff --git a/python/obitools3/dms/dms.pyx b/python/obitools3/dms/dms.pyx index 347e5f8..514c4ea 100644 --- a/python/obitools3/dms/dms.pyx +++ b/python/obitools3/dms/dms.pyx @@ -1,9 +1,10 @@ #cython: language_level=3 -from libc.stdlib cimport malloc +from libc.stdlib cimport malloc,free from .capi.obidms cimport obi_dms, \ - obi_close_dms + obi_close_dms, \ + obi_dms_get_full_path from .capi.obidmscolumn cimport obi_close_column, \ OBIDMS_column_p, \ @@ -18,26 +19,18 @@ from .capi.obiview cimport Obiview_p, \ obi_view_unmap_file from .capi.obitypes cimport const_char_p, \ - OBIType_t, \ - OBI_INT, \ - OBI_FLOAT, \ - OBI_BOOL, \ - OBI_CHAR, \ - OBI_QUAL, \ - OBI_STR, \ - OBI_SEQ, \ name_data_type, \ only_ATGC # discuss from obitools3.utils cimport bytes2str, \ str2bytes, \ - tobytes + tobytes, \ + tostr - -from .taxonomy cimport Taxonomy - +from pathlib import Path __OBIDMS_COLUMN_CLASS__ = {} +__OBIDMS_VIEW_CLASS__= {} cdef class DMS : @@ -66,7 +59,7 @@ cdef class DMS : ''' # Declarations - cdef bytes dms_name_b = tobytes(dms_name): + cdef bytes dms_name_b = tobytes(dms_name) # Fill structure and create or open the DMS self._pointer = obi_dms( dms_name_b) @@ -79,7 +72,7 @@ cdef class DMS : """ Destructor of the DMS instance. - The destructor automatically call the `close` methode and + The destructor automatically call the `close` method and therefore free all the associated memory. """ @@ -91,8 +84,10 @@ cdef class DMS : def name(self): """ Returns the name of the DMS instance + + @rtype: bytes """ - return bytes2str(self._pointer.dms_name) + return self._pointer.dms_name cpdef close(self) : """ @@ -100,9 +95,36 @@ cdef class DMS : the `close` method is automatically called by the object destructor. """ + if (obi_close_dms(self._pointer)) < 0 : raise Exception("Problem closing an OBIDMS") + def keys(self): + cdef const_char_p path = obi_dms_get_full_path(self._pointer, + b"VIEWS" + ) + + if path==NULL: + raise RuntimeError("Cannot retreive the Dataabase path") + + p = Path(bytes2str(path)) + + free(path) + + for v in p.glob("*.obiview"): + yield str2bytes(v.stem) + + def __contains__(self,key): + cdef str key_s = tostr(key) + cdef const_char_p path = obi_dms_get_full_path(self._pointer, + b"VIEWS" + ) + p = Path(bytes2str(path),key_s) - + free(path) + + return p.with_suffix(".obiview").is_file() + + cpdef int view_count(self): + return PyList_Size(list(self.keys())) \ No newline at end of file diff --git a/python/obitools3/dms/view.pxd b/python/obitools3/dms/view.pxd index e0c09ba..59dcff3 100644 --- a/python/obitools3/dms/view.pxd +++ b/python/obitools3/dms/view.pxd @@ -1,44 +1,18 @@ #cython: language_level=3 -from .capi.obitypes cimport obiversion_t, \ - index_t, \ - only_ATGC - -from .capi.obiview cimport Obiview_p, \ - Obiview_infos_p, \ - Alias_column_pair_p, \ - obi_new_view_nuc_seqs, \ - obi_new_view, \ - obi_new_view_cloned_from_name, \ - obi_new_view_nuc_seqs_cloned_from_name, \ - obi_view_map_file, \ - obi_view_unmap_file, \ - obi_open_view, \ - obi_view_delete_column, \ - obi_view_add_column, \ - obi_view_create_column_alias, \ - obi_view_get_column, \ - obi_view_get_pointer_on_column_in_view, \ - obi_save_and_close_view, \ - VIEW_TYPE_NUC_SEQS, \ - NUC_SEQUENCE_COLUMN, \ - ID_COLUMN, \ - DEFINITION_COLUMN, \ - QUALITY_COLUMN - -from obitools3.utils cimport tobytes, \ - bytes2str, \ - tostr - +from .capi.obiview cimport Obiview_p +from .capi.obitypes cimport index_t, \ + obitype_t + from .dms cimport DMS +from .column cimport Column + + cdef class View: cdef DMS _dms cdef Obiview_p _pointer - cdef dict _columns - - cdef __init_columns__(self) cpdef View clone(self, object view_name, @@ -55,17 +29,18 @@ cdef class View: cpdef close(self) + cpdef get_column(self, + object column_name) + cpdef delete_column(self, object column_name) cpdef rename_column(self, object current_name, object new_name) - - cdef update_column_pointers(self) - - cpdef OBIView_line_selection new_selection(self, - list lines=*) + + cpdef View_line_selection new_selection(self, + list lines=*) cdef class View_line_selection(list): @@ -80,6 +55,6 @@ cdef class View_line_selection(list): cdef class View_line : cdef index_t _index - cdef View _view + cdef View _view diff --git a/python/obitools3/dms/view.pyx b/python/obitools3/dms/view.pyx index ca74570..aa860d8 100644 --- a/python/obitools3/dms/view.pyx +++ b/python/obitools3/dms/view.pyx @@ -1,5 +1,19 @@ #cython: language_level=3 +from libc.stdlib cimport malloc + +from .capi.obiview cimport obi_new_view, \ + obi_open_view, \ + obi_save_and_close_view, \ + obi_view_get_pointer_on_column_in_view, \ + obi_view_delete_column, \ + obi_view_create_column_alias + +from .capi.obidmscolumn cimport OBIDMS_column_p + +from obitools3.utils cimport tobytes, \ + bytes2str + cdef class View : @@ -46,8 +60,8 @@ cdef class View : bytes view_name, bytes comments=b""): - cdef View view = OBIView(dms, - 987654) # @DuplicatedSignature + cdef View view = View(dms, + 987654) # @DuplicatedSignature view._pointer = obi_new_view(dms._pointer, view_name, @@ -61,7 +75,7 @@ cdef class View : return view @staticmethod - def new(OBIDMS dms, + def new(DMS dms, object view_name, object comments=None): @@ -104,35 +118,65 @@ cdef class View : cpdef close(self): if (obi_save_and_close_view(self._pointer) < 0) : - raise Exception("Problem closing a view") + raise Exception("Problem closing view %s" % + bytes2str(self.name)) def __dealloc__(self): self.close() def __repr__(self) : - cdef str s - s = str(self.name) + "\n" + str(self.comments) + "\n" + str(self.line_count) + " lines\n" - for column_name in self._columns : - s = s + repr(self._columns[column_name]) + '\n' + cdef str s = "{name:s}\n{comments:s}\n{line_count:d} lines\n".format(name = str(self.name), + comments = str(self.comments), + line_count = self.line_count) + + #for column_name in self._columns : + # s = s + repr(self._columns[column_name]) + '\n' + return s + cpdef get_column(self, + object column_name): + cdef bytes column_name_b = tobytes(column_name) + cdef OBIDMS_column_p* column_pp + cdef OBIDMS_column_p column_p + cdef Column column + cdef obitype_t column_type + + column_pp = obi_view_get_pointer_on_column_in_view(self._pointer, + column_name_b) + + if column_pp == NULL: + raise KeyError("Cannot access to column %s in view %s" % ( + bytes2str(column_name_b), + bytes2str(self.name) + )) + + column_p = column_pp[0] + column_type = column_p.header.returned_data_type + + column = DMS.get_column_class(column_type)(self) + column._pointer = column_pp + + return column cpdef delete_column(self, object column_name) : - if obi_view_delete_column(self._pointer, tobytes(column_name)) < 0 : + + cdef bytes column_name_b = tobytes(column_name) + + if obi_view_delete_column(self._pointer, column_name_b) < 0 : raise Exception("Problem deleting column %s from a view", - tostr(column_name)) - # Update the dictionary of column objects: - self.update_column_pointers() + bytes2str(column_name_b)) + cpdef rename_column(self, object current_name, object new_name): - cdef OBIDMS_column column + cdef Column column cdef bytes current_name_b = tobytes(current_name) - cdef bytes new_name_b = tobyes(new_name) + cdef bytes new_name_b = tobytes(new_name) if (obi_view_create_column_alias(self._pointer, tobytes(current_name_b), @@ -142,15 +186,15 @@ cdef class View : bytes2str(new_name_b))) - cpdef OBIView_line_selection new_selection(self,list lines=None): - return OBIView_line_selection(self,lines) + cpdef View_line_selection new_selection(self,list lines=None): + return View_line_selection(self,lines) def __iter__(self): # Iteration on each line of all columns # Declarations cdef index_t line_nb - cdef OBIView_line line + cdef View_line line # Yield each line for line_nb in range(self.line_count) : @@ -162,7 +206,7 @@ cdef class View : if type(item) == str : return (self._columns)[item] elif type(item) == int : - return OBIView_line(self, item) + return View_line(self, item) def __contains__(self, str column_name): @@ -174,7 +218,7 @@ cdef class View : def __str__(self) : - cdef OBIView_line line + cdef View_line line cdef str to_print to_print = "" for line in self : @@ -194,28 +238,23 @@ cdef class View : # name property getter @property def name(self): - return bytes2str(self._pointer.infos.name) + return self._pointer.infos.name # view type property getter @property def type(self): # @ReservedAssignment return bytes2str(self._pointer.infos.view_type) - # columns property getter - @property - def columns(self): - return self._columns - # comments property getter @property def comments(self): - return bytes2str(self._pointer.infos.comments) + return self._pointer.infos.comments # TODO setter that concatenates new comments? -cdef class OBIView_line_selection(list): +cdef class View_line_selection(list): - def __init__(self, OBIView view, lines=None) : + def __init__(self, View view, lines=None) : if view._pointer == NULL: raise Exception("Error: trying to create a line selection with an invalidated view") self._view = view @@ -239,10 +278,10 @@ cdef class OBIView_line_selection(list): def append(self, index_t idx) : if idx >= self._view.line_count : - raise RuntimeError("Error: trying to select line %d beyond the line count %d of view %s" % - (i, - max_i, - self._view_name) + raise IndexError("Error: trying to select line %d beyond the line count %d of view %s" % + (idx, + self._view.line_count, + bytes2str(self.name)) ) list.append(self,idx) @@ -253,22 +292,23 @@ cdef class OBIView_line_selection(list): line_selection_p = malloc((l_selection + 1) * sizeof(index_t)) # +1 for the -1 flagging the end of the array for i in range(l_selection) : - line_selection_p[i] = line_selection[i] + line_selection_p[i] = self[i] line_selection_p[l_selection] = -1 # flagging the end of the array return line_selection_p - cpdef OBIView materialize(self, - str view_name, - str comments=""): + cpdef View materialize(self, + object view_name, + object comments=""): - cdef OBIView view = OBIView(987654) - + cdef View view = View(987654) + cdef bytes view_name_b=tobytes(view_name) + view._pointer = obi_new_view(self._view._pointer.dms, - str2bytes(view_name), + view_name_b, self._view._pointer, self.__build_binary_list__(), - str2bytes(comments)) + tobytes(comments)) if view._pointer == NULL : raise RuntimeError("Error : Cannot clone view %s into view %s" @@ -276,104 +316,102 @@ cdef class OBIView_line_selection(list): view_name) ) - view.__init_columns__() - return view -cdef class OBIView_line : +cdef class View_line : - def __init__(self, OBIView view, index_t line_nb) : + def __init__(self, View view, index_t line_nb) : self._index = line_nb self._view = view def __getitem__(self, str column_name) : return ((self._view)._columns)[column_name][self._index] - def __setitem__(self, str column_name, object value): - # TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get) - # TODO OBI_QUAL ? - cdef type value_type - cdef str value_obitype - cdef bytes value_b - - if column_name not in self._view : - if value == None : - raise Exception("Trying to create a column from a None value (can't guess type)") - value_type = type(value) - if value_type == int : - value_obitype = 'OBI_INT' - elif value_type == float : - value_obitype = 'OBI_FLOAT' - elif value_type == bool : - value_obitype = 'OBI_BOOL' - elif value_type == str or value_type == bytes : - if value_type == str : - value_b = str2bytes(value) - else : - value_b = value - if only_ATGC(value_b) : # TODO detect IUPAC - value_obitype = 'OBI_SEQ' - elif len(value) == 1 : - value_obitype = 'OBI_CHAR' - elif (len(value) > 1) : - value_obitype = 'OBI_STR' - else : - raise Exception("Could not guess the type of a value to create a new column") - self._view.add_column(column_name, type=value_obitype) - - (((self._view)._columns)[column_name]).set_line(self._index, value) - - def __iter__(self): - for column_name in ((self._view)._columns) : - yield column_name - - def __contains__(self, str column_name): - return (column_name in self._view._columns) +# def __setitem__(self, str column_name, object value): +# # TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get) +# # TODO OBI_QUAL ? +# cdef type value_type +# cdef str value_obitype +# cdef bytes value_b +# +# if column_name not in self._view : +# if value == None : +# raise Exception("Trying to create a column from a None value (can't guess type)") +# value_type = type(value) +# if value_type == int : +# value_obitype = 'OBI_INT' +# elif value_type == float : +# value_obitype = 'OBI_FLOAT' +# elif value_type == bool : +# value_obitype = 'OBI_BOOL' +# elif value_type == str or value_type == bytes : +# if value_type == str : +# value_b = str2bytes(value) +# else : +# value_b = value +# if only_ATGC(value_b) : # TODO detect IUPAC +# value_obitype = 'OBI_SEQ' +# elif len(value) == 1 : +# value_obitype = 'OBI_CHAR' +# elif (len(value) > 1) : +# value_obitype = 'OBI_STR' +# else : +# raise Exception("Could not guess the type of a value to create a new column") +# self._view.add_column(column_name, type=value_obitype) +# +# (((self._view)._columns)[column_name]).set_line(self._index, value) +# +# def __iter__(self): +# for column_name in ((self._view)._columns) : +# yield column_name +# +# def __contains__(self, str column_name): +# return (column_name in self._view._columns) def __repr__(self): cdef dict line cdef str column_name line = {} - for column_name in self._view._columns : - line[column_name] = self[column_name] +# for column_name in self._view._columns : +# line[column_name] = self[column_name] return str(line) - cpdef dict get_view_infos(self, str view_name) : - - cdef Obiview_infos_p view_infos_p - cdef dict view_infos_d - cdef Alias_column_pair_p column_refs - cdef int i, j - cdef str column_name - - view_infos_p = obi_view_map_file(self._pointer, - tobytes(view_name)) - view_infos_d = {} - view_infos_d["name"] = bytes2str(view_infos_p.name) - view_infos_d["comments"] = bytes2str(view_infos_p.comments) - view_infos_d["view_type"] = bytes2str(view_infos_p.view_type) - view_infos_d["column_count"] = view_infos_p.column_count - view_infos_d["line_count"] = view_infos_p.line_count - view_infos_d["created_from"] = bytes2str(view_infos_p.created_from) - view_infos_d["creation_date"] = bytes2str(obi_format_date(view_infos_p.creation_date)) - if (view_infos_p.all_lines) : - view_infos_d["line_selection"] = None - else : - view_infos_d["line_selection"] = {} - view_infos_d["line_selection"]["column_name"] = bytes2str((view_infos_p.line_selection).column_name) - view_infos_d["line_selection"]["version"] = (view_infos_p.line_selection).version - view_infos_d["column_references"] = {} - column_references = view_infos_p.column_references - for j in range(view_infos_d["column_count"]) : - column_name = bytes2str((column_references[j]).alias) - view_infos_d["column_references"][column_name] = {} - view_infos_d["column_references"][column_name]["original_name"] = bytes2str((column_references[j]).column_refs.column_name) - view_infos_d["column_references"][column_name]["version"] = (column_references[j]).column_refs.version - - obi_view_unmap_file(self._pointer, view_infos_p) - - return view_infos_d +# cpdef dict get_view_infos(self, str view_name) : +# +# cdef Obiview_infos_p view_infos_p +# cdef dict view_infos_d +# cdef Alias_column_pair_p column_refs +# cdef int i, j +# cdef str column_name +# +# view_infos_p = obi_view_map_file(self._pointer, +# tobytes(view_name)) +# view_infos_d = {} +# view_infos_d["name"] = bytes2str(view_infos_p.name) +# view_infos_d["comments"] = bytes2str(view_infos_p.comments) +# view_infos_d["view_type"] = bytes2str(view_infos_p.view_type) +# view_infos_d["column_count"] = view_infos_p.column_count +# view_infos_d["line_count"] = view_infos_p.line_count +# view_infos_d["created_from"] = bytes2str(view_infos_p.created_from) +# view_infos_d["creation_date"] = bytes2str(obi_format_date(view_infos_p.creation_date)) +# if (view_infos_p.all_lines) : +# view_infos_d["line_selection"] = None +# else : +# view_infos_d["line_selection"] = {} +# view_infos_d["line_selection"]["column_name"] = bytes2str((view_infos_p.line_selection).column_name) +# view_infos_d["line_selection"]["version"] = (view_infos_p.line_selection).version +# view_infos_d["column_references"] = {} +# column_references = view_infos_p.column_references +# for j in range(view_infos_d["column_count"]) : +# column_name = bytes2str((column_references[j]).alias) +# view_infos_d["column_references"][column_name] = {} +# view_infos_d["column_references"][column_name]["original_name"] = bytes2str((column_references[j]).column_refs.column_name) +# view_infos_d["column_references"][column_name]["version"] = (column_references[j]).column_refs.version +# +# obi_view_unmap_file(self._pointer, view_infos_p) +# +# return view_infos_d diff --git a/python/obitools3/parsers/fasta.pyx b/python/obitools3/parsers/fasta.pyx index c788414..685b865 100644 --- a/python/obitools3/parsers/fasta.pyx +++ b/python/obitools3/parsers/fasta.pyx @@ -6,8 +6,12 @@ Created on 30 mars 2016 @author: coissac ''' +#from obitools3.dms._obiseq cimport OBI_Seq -def fastaIterator(lineiterator, int buffersize=100000000): + +def fastaIterator(lineiterator, + int buffersize=100000000 + ): cdef LineBuffer lb cdef str ident cdef str definition @@ -15,6 +19,7 @@ def fastaIterator(lineiterator, int buffersize=100000000): cdef list s cdef bytes sequence cdef bytes quality +# cdef OBI_Seq seq if isinstance(lineiterator,(str,bytes)): lineiterator=uopen(lineiterator) @@ -41,7 +46,60 @@ def fastaIterator(lineiterator, int buffersize=100000000): sequence = b"".join(s) quality = None - + + +# seq = OBI_Seq(id, +# sequence, +# definition, +# tags=tags, +# ) + yield { "id" : ident, + "definition" : definition, + "sequence" : sequence, + "quality" : quality, + "tags" : tags, + "annotation" : {} + } + + +def fastaNucIterator(lineiterator, int buffersize=100000000): + cdef LineBuffer lb + cdef str ident + cdef str definition + cdef dict tags + cdef list s + cdef bytes sequence + cdef bytes quality +# cdef OBI_Seq seq + + if isinstance(lineiterator,(str,bytes)): + lineiterator=uopen(lineiterator) + + if isinstance(lineiterator, LineBuffer): + lb=lineiterator + else: + lb=LineBuffer(lineiterator,buffersize) + + i = iter(lb) + line = next(i) + + while True: + ident,tags,definition = parseHeader(line) + s = [] + line = next(i) + + try: + while line[0]!='>': + s.append(str2bytes(line)[0:-1]) + line = next(i) + except StopIteration: + pass + + sequence = b"".join(s) + quality = None + + +# seq = yield { "id" : ident, "definition" : definition, "sequence" : sequence, diff --git a/python/obitools3/utils.pyx b/python/obitools3/utils.pyx index ea9eb1c..b7e4399 100644 --- a/python/obitools3/utils.pyx +++ b/python/obitools3/utils.pyx @@ -1,6 +1,4 @@ #cython: language_level=3 -from IPython.utils._tokenize_py2 import String - cdef bytes str2bytes(str string): """