2015-05-27 15:05:14 +02:00
#cython: language_level=3
2015-05-26 14:17:08 +02:00
2015-09-28 13:51:35 +02:00
from obitools3.utils cimport bytes2str , str2bytes
2015-10-08 10:44:13 +02:00
from .capi.obidms cimport obi_dms , \
obi_close_dms
2016-02-18 10:38:51 +01:00
2016-04-14 15:13:30 +02:00
from .capi.obidmscolumn cimport obi_close_column , \
2016-02-18 10:38:51 +01:00
OBIDMS_column_p , \
2015-10-14 18:05:34 +02:00
OBIDMS_column_header_p
2016-04-25 18:35:02 +02:00
from .capi.obiutils cimport obi_format_date
2016-08-10 14:51:02 +02:00
from .capi.obialign cimport obi_align_one_column
2015-10-14 18:05:34 +02:00
from .capi.obitypes cimport const_char_p , \
2015-11-23 15:48:27 +01:00
OBIType_t , \
OBI_INT , \
OBI_FLOAT , \
OBI_BOOL , \
OBI_CHAR , \
2016-05-20 16:45:22 +02:00
OBI_QUAL , \
2015-11-23 15:48:27 +01:00
OBI_STR , \
OBI_SEQ , \
2016-02-18 10:38:51 +01:00
name_data_type , \
only_ATGC # discuss
2015-10-14 18:05:34 +02:00
2016-02-29 16:33:30 +01:00
from ._obidms cimport OBIDMS , \
OBIDMS_column , \
OBIView , \
OBIView_line
2016-03-11 13:56:38 +01:00
from ._obitaxo cimport OBI_Taxonomy
2016-02-29 16:33:30 +01:00
from ._obiseq cimport OBI_Nuc_Seq , OBI_Nuc_Seq_Stored
2015-09-28 13:51:35 +02:00
from ._obidmscolumn_int cimport OBIDMS_column_int , \
2016-02-18 10:38:51 +01:00
OBIDMS_column_multi_elts_int
2015-10-15 15:12:45 +02:00
2015-09-28 13:51:35 +02:00
from ._obidmscolumn_float cimport OBIDMS_column_float , \
2016-02-18 10:38:51 +01:00
OBIDMS_column_multi_elts_float
2015-09-28 13:51:35 +02:00
from ._obidmscolumn_bool cimport OBIDMS_column_bool , \
2016-02-18 10:38:51 +01:00
OBIDMS_column_multi_elts_bool
2015-09-28 13:51:35 +02:00
from ._obidmscolumn_char cimport OBIDMS_column_char , \
2016-02-18 10:38:51 +01:00
OBIDMS_column_multi_elts_char
2015-09-28 13:51:35 +02:00
2016-06-09 15:54:36 +02:00
from ._obidmscolumn_qual cimport OBIDMS_column_qual , \
OBIDMS_column_multi_elts_qual
2016-05-20 16:45:22 +02:00
2015-11-03 14:22:00 +01:00
from ._obidmscolumn_str cimport OBIDMS_column_str , \
2016-02-18 10:38:51 +01:00
OBIDMS_column_multi_elts_str
2015-09-17 11:23:50 +02:00
2015-11-19 18:12:48 +01:00
from ._obidmscolumn_seq cimport OBIDMS_column_seq , \
2016-02-18 10:38:51 +01:00
OBIDMS_column_multi_elts_seq
from .capi.obiview cimport Obiview_p , \
2016-02-25 09:43:27 +01:00
Obiview_infos_p , \
2016-08-01 18:25:30 +02:00
Alias_column_pair_p , \
2016-02-25 09:43:27 +01:00
obi_new_view_nuc_seqs , \
2016-02-18 10:38:51 +01:00
obi_new_view , \
obi_new_view_cloned_from_name , \
2016-02-25 09:43:27 +01:00
obi_new_view_nuc_seqs_cloned_from_name , \
2016-06-30 11:41:30 +02:00
obi_view_map_file , \
obi_view_unmap_file , \
2016-02-18 10:38:51 +01:00
obi_open_view , \
obi_view_delete_column , \
obi_view_add_column , \
2016-08-01 18:25:30 +02:00
obi_view_create_column_alias , \
2016-05-11 16:36:23 +02:00
obi_view_get_column , \
2016-02-18 10:38:51 +01:00
obi_view_get_pointer_on_column_in_view , \
2016-02-25 09:43:27 +01:00
obi_save_and_close_view , \
VIEW_TYPE_NUC_SEQS , \
NUC_SEQUENCE_COLUMN , \
ID_COLUMN , \
2016-05-20 16:45:22 +02:00
DEFINITION_COLUMN , \
QUALITY_COLUMN
2016-02-18 10:38:51 +01:00
from libc.stdlib cimport malloc
2015-11-19 18:12:48 +01:00
2015-09-15 17:09:31 +02:00
2016-02-18 10:38:51 +01:00
cdef class OBIDMS_column :
2016-09-28 14:26:23 +02:00
# Note: should only be initialized through a subclass
2016-09-26 16:12:48 +02:00
def __init__ ( self , OBIView view , str column_alias ) :
2015-09-24 11:32:09 +02:00
2016-02-18 10:38:51 +01:00
cdef OBIDMS_column_p column_p
cdef OBIDMS_column_p * column_pp
2015-05-26 10:37:26 +02:00
2016-09-28 16:56:44 +02:00
column_pp = obi_view_get_pointer_on_column_in_view ( view . _pointer , str2bytes ( column_alias ) )
2016-02-18 10:38:51 +01:00
column_p = column_pp [ 0 ] # TODO ugly cython dereferencing but can't find better
2015-09-24 11:32:09 +02:00
2016-02-18 10:38:51 +01:00
# Fill structure
2016-09-28 16:56:44 +02:00
self . _alias = column_alias
self . _pointer = column_pp
self . _view = view
2015-09-24 11:32:09 +02:00
2016-02-18 10:38:51 +01:00
def __setitem__ ( self , index_t line_nb , object value ) :
self . set_line ( line_nb , value )
2015-07-31 18:02:40 +02:00
2016-02-18 10:38:51 +01:00
def __getitem__ ( self , index_t line_nb ) :
return self . get_line ( line_nb )
2015-09-24 11:32:09 +02:00
2016-02-18 10:38:51 +01:00
def __len__ ( self ) :
2016-09-28 14:26:23 +02:00
return self . lines_used
2016-02-18 10:38:51 +01:00
def __sizeof__ ( self ) :
2016-09-28 16:56:44 +02:00
return ( ( self . _pointer ) [ 0 ] . header . header_size + ( self . _pointer ) [ 0 ] . header . data_size )
2016-02-18 10:38:51 +01:00
def __iter__ ( self ) :
2015-09-24 11:32:09 +02:00
# Declarations
2016-09-28 14:26:23 +02:00
cdef index_t line_nb
2016-02-18 10:38:51 +01:00
# Yield each line
2016-09-28 14:26:23 +02:00
for line_nb in range ( self . lines_used ) :
2016-02-18 10:38:51 +01:00
yield self . get_line ( line_nb )
2016-09-26 18:04:28 +02:00
2016-05-02 15:16:06 +02:00
def __str__ ( self ) :
2016-02-18 10:38:51 +01:00
cdef str to_print
to_print = ' '
for line in self :
to_print = to_print + str ( line ) + " \n "
return to_print
2016-05-02 15:16:06 +02:00
def __repr__ ( self ) :
2016-09-28 16:56:44 +02:00
return ( self . _alias + " , original name: " + self . original_name + " , version " + str ( self . version ) + " , data type: " + self . data_type )
2016-02-18 10:38:51 +01:00
cpdef close ( self ) :
2016-09-28 16:56:44 +02:00
if obi_close_column ( ( self . _pointer ) [ 0 ] ) < 0 :
2016-02-18 10:38:51 +01:00
raise Exception ( " Problem closing a column " )
2016-09-26 16:12:48 +02:00
# Column alias property getter and setter
@property
def alias ( self ) :
2016-09-28 16:56:44 +02:00
return self . _alias
2016-09-26 16:12:48 +02:00
@alias . setter
def alias ( self , new_alias ) :
2016-09-28 16:56:44 +02:00
self . _view . change_column_alias ( self . _alias , new_alias )
2016-02-18 10:38:51 +01:00
2016-09-26 17:01:13 +02:00
# elements_names property getter
2016-09-26 16:53:16 +02:00
@property
def elements_names ( self ) :
2016-09-28 16:56:44 +02:00
return ( bytes2str ( ( ( self . _pointer ) [ 0 ] . header ) . elements_names ) ) . split ( ' ; ' )
2016-09-26 16:53:16 +02:00
2016-09-26 17:01:13 +02:00
# nb_elements_per_line property getter
@property
def nb_elements_per_line ( self ) :
2016-09-28 16:56:44 +02:00
return ( ( self . _pointer ) [ 0 ] . header ) . nb_elements_per_line
2016-09-26 17:01:13 +02:00
2016-09-26 17:12:20 +02:00
# data_type property getter
@property
def data_type ( self ) :
2016-09-28 16:56:44 +02:00
return bytes2str ( name_data_type ( ( ( self . _pointer ) [ 0 ] . header ) . returned_data_type ) )
2016-09-26 17:12:20 +02:00
2016-09-26 17:31:32 +02:00
# original_name property getter
@property
def original_name ( self ) :
2016-09-28 16:56:44 +02:00
return bytes2str ( ( ( self . _pointer ) [ 0 ] . header ) . name )
2016-09-26 17:31:32 +02:00
2016-09-26 17:45:10 +02:00
# version property getter
@property
def version ( self ) :
2016-09-28 16:56:44 +02:00
return ( ( self . _pointer ) [ 0 ] . header ) . version
2016-09-26 18:04:28 +02:00
# lines_used property getter
@property
def lines_used ( self ) :
2016-09-28 16:56:44 +02:00
return ( self . _pointer ) [ 0 ] . header . lines_used
2016-09-26 17:45:10 +02:00
2016-09-27 14:16:30 +02:00
# comments property getter
@property
def comments ( self ) :
2016-09-28 16:56:44 +02:00
return bytes2str ( ( self . _pointer ) [ 0 ] . header . comments )
2016-09-27 14:16:30 +02:00
# creation_date property getter
@property
def creation_date ( self ) :
2016-09-28 16:56:44 +02:00
return bytes2str ( obi_format_date ( ( self . _pointer ) [ 0 ] . header . creation_date ) )
2016-09-27 14:16:30 +02:00
2016-02-18 10:38:51 +01:00
@staticmethod
cdef object get_subclass_type ( OBIDMS_column_p column_p ) :
2016-09-28 14:26:23 +02:00
cdef object subclass
2015-10-14 18:05:34 +02:00
cdef OBIDMS_column_header_p header
2016-09-28 14:26:23 +02:00
cdef OBIType_t col_type
cdef bint col_writable
cdef bint col_one_element_per_line
2016-02-18 10:38:51 +01:00
header = column_p . header
col_type = header . returned_data_type
col_writable = column_p . writable
col_one_element_per_line = ( ( header . nb_elements_per_line ) == 1 )
2015-09-24 11:32:09 +02:00
2016-02-18 10:38:51 +01:00
if col_type == OBI_INT :
if col_one_element_per_line :
subclass = OBIDMS_column_int
else :
subclass = OBIDMS_column_multi_elts_int
elif col_type == OBI_FLOAT :
if col_one_element_per_line :
subclass = OBIDMS_column_float
else :
subclass = OBIDMS_column_multi_elts_float
elif col_type == OBI_BOOL :
if col_one_element_per_line :
subclass = OBIDMS_column_bool
else :
subclass = OBIDMS_column_multi_elts_bool
elif col_type == OBI_CHAR :
if col_one_element_per_line :
subclass = OBIDMS_column_char
else :
subclass = OBIDMS_column_multi_elts_char
2016-05-20 16:45:22 +02:00
elif col_type == OBI_QUAL :
if col_one_element_per_line :
subclass = OBIDMS_column_qual
2016-06-09 15:54:36 +02:00
else :
subclass = OBIDMS_column_multi_elts_qual
2016-02-18 10:38:51 +01:00
elif col_type == OBI_STR :
if col_one_element_per_line :
subclass = OBIDMS_column_str
else :
subclass = OBIDMS_column_multi_elts_str
elif col_type == OBI_SEQ :
if col_one_element_per_line :
subclass = OBIDMS_column_seq
else :
subclass = OBIDMS_column_multi_elts_seq
else :
raise Exception ( " Problem with the data type " )
return subclass
######################################################################################################
2016-09-28 14:26:23 +02:00
2016-02-18 10:38:51 +01:00
cdef class OBIDMS_column_multi_elts ( OBIDMS_column ) :
def __getitem__ ( self , index_t line_nb ) :
return OBIDMS_column_line ( self , line_nb )
cpdef set_line ( self , index_t line_nb , dict values ) :
for element_name in values :
self . set_item ( line_nb , element_name , values [ element_name ] )
2016-09-28 14:26:23 +02:00
2016-02-18 10:38:51 +01:00
######################################################################################################
2016-09-28 14:26:23 +02:00
2016-02-18 10:38:51 +01:00
cdef class OBIDMS_column_line :
def __init__ ( self , OBIDMS_column column , index_t line_nb ) :
2016-09-28 16:56:44 +02:00
self . _index = line_nb
self . _column = column
2016-02-18 10:38:51 +01:00
def __getitem__ ( self , str element_name ) :
2016-09-28 16:56:44 +02:00
return self . _column . get_item ( self . _index , element_name )
2015-10-02 13:51:26 +02:00
2016-02-18 10:38:51 +01:00
def __setitem__ ( self , str element_name , object value ) :
2016-09-28 16:56:44 +02:00
self . _column . set_item ( self . _index , element_name , value )
2016-02-18 10:38:51 +01:00
def __contains__ ( self , str element_name ) :
2016-09-28 16:56:44 +02:00
return ( element_name in self . _column . elements_names )
2016-02-18 10:38:51 +01:00
def __repr__ ( self ) :
2016-09-28 16:56:44 +02:00
return str ( self . _column . get_line ( self . _index ) )
2016-02-18 10:38:51 +01:00
2016-09-28 14:26:23 +02:00
######################################################################################################
2016-02-18 10:38:51 +01:00
cdef class OBIView :
2016-10-06 17:55:18 +02:00
def __init__ ( self , OBIDMS dms , str view_name , bint new = False , object view_to_clone = None , OBIView_line_selection line_selection = None , str comments = " " , bint quality_column = False , str view_type = None ) :
2016-05-04 13:43:26 +02:00
2016-10-06 17:55:18 +02:00
cdef index_t * line_selection_p = NULL
2016-09-28 14:26:23 +02:00
cdef int i
cdef str col_alias
cdef OBIDMS_column_p column_p
cdef object subclass
2016-11-08 11:23:54 +01:00
2016-02-18 10:38:51 +01:00
if line_selection is not None :
2016-10-06 17:55:18 +02:00
# Get the name of the associated view to clone
view_to_clone = line_selection . _view_name # TODO discuss. This makes it possible for the view to clone to be closed. If a view to clone was given it is not checked.
# Build the C array corresponding to the line selection
line_selection_p = < index_t * > malloc ( ( len ( line_selection ) + 1 ) * sizeof ( index_t ) ) # +1 for the -1 flagging the end of the array
2016-02-18 10:38:51 +01:00
for i in range ( len ( line_selection ) ) :
2016-05-02 15:16:06 +02:00
line_selection_p [ i ] = line_selection [ i ]
2016-10-06 17:55:18 +02:00
line_selection_p [ len ( line_selection ) ] = - 1 # flagging the end of the array
2016-02-18 10:38:51 +01:00
2016-11-08 11:23:54 +01:00
self . _pointer = self . _open_or_create_view ( dms , view_name , new = new , view_to_clone = view_to_clone , line_selection_p = line_selection_p , comments = comments , quality_column = quality_column , view_type = view_type )
2016-10-06 17:55:18 +02:00
# Go through columns to build dictionaries of corresponding python instances # TODO make function?
self . _columns = { }
for i in range ( self . _pointer . infos . column_count ) :
col_alias = bytes2str ( self . _pointer . infos . column_references [ i ] . alias )
column_p = < OBIDMS_column_p > ( self . _pointer . columns ) [ i ]
subclass = OBIDMS_column . get_subclass_type ( column_p )
self . _columns [ col_alias ] = subclass ( self , col_alias )
cdef Obiview_p _open_or_create_view ( self , OBIDMS dms , str view_name , bint new = False , object view_to_clone = None , index_t * line_selection_p = NULL , str comments = " " , bint quality_column = False , str view_type = None ) :
cdef Obiview_p view = NULL
# Create the view if needed, with the right type
2016-02-18 10:38:51 +01:00
if new :
2016-10-06 17:55:18 +02:00
if view_type is None :
if view_to_clone is not None :
if type ( view_to_clone ) == str :
view = obi_new_view_cloned_from_name ( dms . _pointer , str2bytes ( view_name ) , str2bytes ( view_to_clone ) , line_selection_p , str2bytes ( comments ) )
else :
view = obi_new_view ( dms . _pointer , str2bytes ( view_name ) , ( < OBIView > view_to_clone ) . _pointer , line_selection_p , str2bytes ( comments ) )
2016-02-18 10:38:51 +01:00
else :
2016-10-06 17:55:18 +02:00
view = obi_new_view ( dms . _pointer , str2bytes ( view_name ) , NULL , line_selection_p , str2bytes ( comments ) )
2016-11-08 11:23:54 +01:00
elif view_type == bytes2str ( VIEW_TYPE_NUC_SEQS ) :
2016-10-06 17:55:18 +02:00
if view_to_clone is not None :
if type ( view_to_clone ) == str :
view = obi_new_view_nuc_seqs_cloned_from_name ( dms . _pointer , str2bytes ( view_name ) , str2bytes ( view_to_clone ) , line_selection_p , str2bytes ( comments ) , quality_column )
else :
view = obi_new_view_nuc_seqs ( dms . _pointer , str2bytes ( view_name ) , ( < OBIView > view_to_clone ) . _pointer , line_selection_p , str2bytes ( comments ) , quality_column )
else :
view = obi_new_view_nuc_seqs ( dms . _pointer , str2bytes ( view_name ) , NULL , line_selection_p , str2bytes ( comments ) , quality_column )
2016-11-08 11:23:54 +01:00
else :
raise Exception ( " View type not recognized " )
2016-05-02 15:16:06 +02:00
# Else, open the existing view
2016-09-28 14:26:23 +02:00
else :
2016-09-28 16:56:44 +02:00
view = obi_open_view ( dms . _pointer , str2bytes ( view_name ) )
2016-09-28 14:26:23 +02:00
2016-02-18 10:38:51 +01:00
if view == NULL :
2016-05-02 15:16:06 +02:00
raise Exception ( " Error creating/opening a view " )
2015-09-17 11:23:50 +02:00
2016-10-06 17:55:18 +02:00
return view
2016-08-01 18:25:30 +02:00
2016-02-18 10:38:51 +01:00
def __repr__ ( self ) :
cdef str s
2016-09-28 14:26:23 +02:00
s = str ( self . name ) + " \n " + str ( self . comments ) + " \n " + str ( self . line_count ) + " lines \n "
2016-09-28 16:56:44 +02:00
for column_name in self . _columns :
s = s + repr ( self . _columns [ column_name ] ) + ' \n '
2016-02-18 10:38:51 +01:00
return s
cpdef delete_column ( self , str column_name ) :
cdef str column_n
2016-09-28 16:56:44 +02:00
if obi_view_delete_column ( self . _pointer , str2bytes ( column_name ) ) < 0 :
2016-02-18 10:38:51 +01:00
raise Exception ( " Problem deleting a column from a view " )
2016-08-01 18:25:30 +02:00
# Update the dictionary of column objects:
2016-09-28 16:56:44 +02:00
( self . _columns ) . pop ( column_name )
2016-08-01 18:25:30 +02:00
self . update_column_pointers ( )
2016-06-10 10:34:47 +02:00
2016-02-18 10:38:51 +01:00
cpdef add_column ( self ,
str column_name ,
obiversion_t version_number = - 1 ,
2016-08-01 18:25:30 +02:00
str alias = ' ' ,
2016-02-18 10:38:51 +01:00
str type = ' ' ,
index_t nb_lines = 0 ,
2016-05-02 15:16:06 +02:00
index_t nb_elements_per_line = 1 ,
2016-02-18 10:38:51 +01:00
list elements_names = None ,
2016-04-15 12:55:26 +02:00
str indexer_name = " " ,
2016-07-18 13:57:49 +02:00
str associated_column_name = " " ,
obiversion_t associated_column_version = - 1 ,
2016-02-18 10:38:51 +01:00
str comments = " " ,
2016-05-02 15:16:06 +02:00
bint create = True
2016-02-18 10:38:51 +01:00
) :
2016-07-19 15:30:17 +02:00
cdef bytes column_name_b
cdef bytes elements_names_b
cdef object subclass
2016-02-18 10:38:51 +01:00
cdef OBIDMS_column_p column_p
column_name_b = str2bytes ( column_name )
2016-08-01 18:25:30 +02:00
if alias == ' ' :
alias = column_name
alias_b = column_name_b
else :
alias_b = str2bytes ( alias )
2016-02-18 10:38:51 +01:00
2016-12-02 17:54:51 +01:00
if elements_names is None :
elements_names_b = str2bytes ( " " )
else :
2016-02-18 10:38:51 +01:00
elements_names_b = str2bytes ( ' ; ' . join ( elements_names ) )
2016-09-28 14:26:23 +02:00
if type : # TODO make C function that does that
2015-11-23 15:48:27 +01:00
if type == ' OBI_INT ' :
data_type = OBI_INT
elif type == ' OBI_FLOAT ' :
data_type = OBI_FLOAT
elif type == ' OBI_BOOL ' :
data_type = OBI_BOOL
elif type == ' OBI_CHAR ' :
data_type = OBI_CHAR
2016-05-20 16:45:22 +02:00
elif type == ' OBI_QUAL ' :
data_type = OBI_QUAL
2015-11-23 15:48:27 +01:00
elif type == ' OBI_STR ' :
data_type = OBI_STR
elif type == ' OBI_SEQ ' :
data_type = OBI_SEQ
else :
raise Exception ( " Invalid provided data type " )
2016-08-01 18:25:30 +02:00
2016-09-28 16:56:44 +02:00
if ( obi_view_add_column ( self . _pointer , column_name_b , version_number , alias_b ,
2016-02-18 10:38:51 +01:00
data_type , nb_lines , nb_elements_per_line ,
2016-04-12 14:53:33 +02:00
elements_names_b , str2bytes ( indexer_name ) ,
2016-07-18 13:57:49 +02:00
str2bytes ( associated_column_name ) , associated_column_version ,
2016-02-18 10:38:51 +01:00
str2bytes ( comments ) , create ) < 0 ) :
raise Exception ( " Problem adding a column in a view " )
2016-05-02 14:23:42 +02:00
# Get the column pointer
2016-09-28 16:56:44 +02:00
column_p = obi_view_get_column ( self . _pointer , alias_b )
2016-02-18 10:38:51 +01:00
# Open and store the subclass
subclass = OBIDMS_column . get_subclass_type ( column_p )
2016-09-28 16:56:44 +02:00
( self . _columns ) [ alias ] = subclass ( self , alias )
2016-09-28 14:26:23 +02:00
2016-08-01 18:25:30 +02:00
cpdef change_column_alias ( self , str current_alias , str new_alias ) :
2016-09-26 16:12:48 +02:00
cdef OBIDMS_column column
2016-09-28 16:56:44 +02:00
if ( obi_view_create_column_alias ( self . _pointer , str2bytes ( current_alias ) , str2bytes ( new_alias ) ) < 0 ) :
2016-08-01 18:25:30 +02:00
raise Exception ( " Problem changing a column alias " )
2016-09-26 16:12:48 +02:00
# Update the dictionaries of column objects
2016-09-28 16:56:44 +02:00
self . _columns [ new_alias ] = self . _columns [ current_alias ]
column = self . _columns [ new_alias ]
column . _alias = new_alias
( self . _columns ) . pop ( current_alias )
2016-08-01 18:25:30 +02:00
cpdef update_column_pointers ( self ) :
cdef str column_n
cdef OBIDMS_column column
2016-09-28 16:56:44 +02:00
for column_n in self . _columns :
column = self . _columns [ column_n ]
column . _pointer = < OBIDMS_column_p * > obi_view_get_pointer_on_column_in_view ( self . _pointer , str2bytes ( column_n ) )
2016-08-01 18:25:30 +02:00
2016-02-18 10:38:51 +01:00
2016-09-30 17:48:53 +02:00
cpdef close ( self ) :
2016-09-28 16:56:44 +02:00
if ( obi_save_and_close_view ( self . _pointer ) < 0 ) :
2016-02-18 10:38:51 +01:00
raise Exception ( " Problem closing a view " )
2016-09-30 17:48:53 +02:00
self . _pointer = NULL
self . _columns = { }
2016-02-18 10:38:51 +01:00
2015-09-17 11:23:50 +02:00
2016-02-18 10:38:51 +01:00
def __iter__ ( self ) :
2016-09-28 14:26:23 +02:00
# Iteration on each line of all columns
2015-09-28 13:51:35 +02:00
# Declarations
2016-05-02 15:16:06 +02:00
cdef index_t line_nb
2016-09-28 14:26:23 +02:00
cdef OBIView_line line
2016-02-18 10:38:51 +01:00
2016-09-28 14:26:23 +02:00
# Yield each line
for line_nb in range ( self . line_count ) :
2016-02-18 10:38:51 +01:00
line = self [ line_nb ]
yield line
2015-11-09 15:56:20 +01:00
2016-02-18 10:38:51 +01:00
def __getitem__ ( self , object item ) :
if type ( item ) == str :
2016-09-28 16:56:44 +02:00
return ( self . _columns ) [ item ]
2016-05-02 15:16:06 +02:00
elif type ( item ) == int :
2016-02-18 10:38:51 +01:00
return OBIView_line ( self , item )
2015-11-09 15:56:20 +01:00
2016-02-18 10:38:51 +01:00
def __contains__ ( self , str column_name ) :
2016-09-28 16:56:44 +02:00
return ( column_name in self . _columns )
2015-09-28 13:51:35 +02:00
2016-08-10 17:20:23 +02:00
def __len__ ( self ) :
2016-09-28 14:26:23 +02:00
return ( self . line_count )
2016-08-10 17:20:23 +02:00
2016-02-18 10:38:51 +01:00
def __str__ ( self ) :
cdef OBIView_line line
cdef str to_print
to_print = " "
2016-09-28 14:26:23 +02:00
for line in self :
2016-02-18 10:38:51 +01:00
to_print = to_print + str ( line ) + " \n "
return to_print
2016-08-10 14:51:02 +02:00
2016-09-05 12:20:00 +02:00
2016-09-28 14:26:23 +02:00
# line_count property getter
@property
def line_count ( self ) :
2016-09-28 16:56:44 +02:00
return self . _pointer . infos . line_count
2016-09-28 14:26:23 +02:00
# name property getter
@property
def name ( self ) :
2016-09-28 16:56:44 +02:00
return bytes2str ( self . _pointer . infos . name )
2016-11-08 17:49:59 +01:00
# view type property getter
@property
def type ( self ) :
return bytes2str ( self . _pointer . infos . view_type )
2016-09-28 14:26:23 +02:00
# columns property getter
@property
def columns ( self ) :
2016-09-28 16:56:44 +02:00
return self . _columns
2016-09-05 12:20:00 +02:00
2016-09-28 14:26:23 +02:00
# comments property getter
@property
def comments ( self ) :
2016-09-28 16:56:44 +02:00
return bytes2str ( self . _pointer . infos . comments )
2016-09-28 14:26:23 +02:00
# TODO setter that concatenates new comments?
2015-09-28 13:51:35 +02:00
2016-09-30 17:48:53 +02:00
######################################################################################################
2015-10-15 15:12:45 +02:00
2016-02-25 09:43:27 +01:00
cdef class OBIView_NUC_SEQS ( OBIView ) :
2016-10-06 17:55:18 +02:00
2016-02-29 16:33:30 +01:00
def __getitem__ ( self , object item ) :
if type ( item ) == str :
2016-09-28 16:56:44 +02:00
return ( self . _columns ) [ item ]
2016-05-02 15:16:06 +02:00
elif type ( item ) == int :
2016-02-29 16:33:30 +01:00
return OBI_Nuc_Seq_Stored ( self , item )
2016-10-06 17:55:18 +02:00
2016-02-29 16:33:30 +01:00
def __setitem__ ( self , index_t line_idx , OBI_Nuc_Seq sequence_obj ) :
for key in sequence_obj :
self [ line_idx ] [ key ] = sequence_obj [ key ]
2016-08-10 14:51:02 +02:00
2016-09-28 14:26:23 +02:00
# TODO discuss
2016-08-12 15:56:07 +02:00
cpdef align ( self , OBIView oview , OBIView iview2 = None ,
2016-08-10 14:51:02 +02:00
double threshold = 0.0 , bint normalize = True , int reference = 0 , bint similarity_mode = True ) :
2016-12-02 17:54:51 +01:00
pass
#
# cdef OBIView iview1
#
# cdef Obiview_p iview1_p
# cdef Obiview_p iview2_p
# cdef Obiview_p oview_p
#
# cdef OBIDMS_column icol1
# cdef OBIDMS_column_p icol1_p
# cdef OBIDMS_column_p* icol1_pp
#
# cdef OBIDMS_column id1_col
# cdef OBIDMS_column_p id1_col_p
# cdef OBIDMS_column_p* id1_col_pp
#
# cdef OBIDMS_column id2_col
# cdef OBIDMS_column_p id2_col_p
# cdef OBIDMS_column_p* id2_col_pp
#
# cdef OBIDMS_column ocol
# cdef OBIDMS_column_p ocol_p
# cdef OBIDMS_column_p* ocol_pp
#
# cdef str id1_col_name
# cdef str id2_col_name
# cdef str score_col_name
#
# score_col_name = "score"
#
# iview1= self
# iview1_p = iview1._pointer
# icol1 = iview1[bytes2str(NUC_SEQUENCE_COLUMN)]
# icol1_pp = icol1._pointer
# icol1_p = icol1_pp[0]
#
# if obi_align_one_column(iview1_p, icol1_p, threshold, normalize, reference, similarity_mode) < 0 :
# raise Exception("Error aligning sequences")
2016-08-10 14:51:02 +02:00
2016-02-25 09:43:27 +01:00
2016-09-30 17:48:53 +02:00
######################################################################################################
2016-02-25 09:43:27 +01:00
2016-02-18 10:38:51 +01:00
cdef class OBIView_line :
2015-09-28 13:51:35 +02:00
2016-02-18 10:38:51 +01:00
def __init__ ( self , OBIView view , index_t line_nb ) :
2016-09-28 16:56:44 +02:00
self . _index = line_nb
self . _view = view
2016-02-18 10:38:51 +01:00
def __getitem__ ( self , str column_name ) :
2016-09-28 16:56:44 +02:00
return ( ( self . _view ) . _columns ) [ column_name ] [ self . _index ]
2016-02-18 10:38:51 +01:00
def __setitem__ ( self , str column_name , object value ) :
# TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get)
2016-05-20 16:45:22 +02:00
# TODO OBI_QUAL ?
2016-11-08 11:23:54 +01:00
cdef type value_type
cdef str value_obitype
cdef bytes value_b
2016-09-28 16:56:44 +02:00
if column_name not in self . _view :
2016-02-18 10:38:51 +01:00
if value == None :
raise Exception ( " Trying to create a column from a None value (can ' t guess type) " )
value_type = type ( value )
if value_type == int :
value_obitype = ' OBI_INT '
elif value_type == float :
value_obitype = ' OBI_FLOAT '
elif value_type == bool :
value_obitype = ' OBI_BOOL '
2016-11-08 11:23:54 +01:00
elif value_type == str or value_type == bytes :
if value_type == str :
value_b = str2bytes ( value )
else :
value_b = value
2016-11-23 11:28:17 +01:00
if only_ATGC ( value_b ) : # TODO detect IUPAC
2016-02-18 10:38:51 +01:00
value_obitype = ' OBI_SEQ '
elif len ( value ) == 1 :
value_obitype = ' OBI_CHAR '
elif ( len ( value ) > 1 ) :
value_obitype = ' OBI_STR '
else :
raise Exception ( " Could not guess the type of a value to create a new column " )
2016-09-28 16:56:44 +02:00
self . _view . add_column ( column_name , type = value_obitype )
2015-10-15 15:12:45 +02:00
2016-09-28 16:56:44 +02:00
( ( ( self . _view ) . _columns ) [ column_name ] ) . set_line ( self . _index , value )
2015-09-28 13:51:35 +02:00
2016-08-10 17:08:22 +02:00
def __iter__ ( self ) :
2016-09-28 16:56:44 +02:00
for column_name in ( ( self . _view ) . _columns ) :
2016-08-10 17:08:22 +02:00
yield column_name
2016-02-18 10:38:51 +01:00
def __contains__ ( self , str column_name ) :
2016-09-28 16:56:44 +02:00
return ( column_name in self . _view . _columns )
2016-02-18 10:38:51 +01:00
def __repr__ ( self ) :
cdef dict line
cdef str column_name
line = { }
2016-09-28 16:56:44 +02:00
for column_name in self . _view . _columns :
2016-02-18 10:38:51 +01:00
line [ column_name ] = self [ column_name ]
return str ( line )
2015-10-15 15:12:45 +02:00
2016-09-30 17:48:53 +02:00
######################################################################################################
cdef class OBIView_line_selection ( list ) :
def __init__ ( self , OBIView view ) :
if view . _pointer == NULL :
raise Exception ( " Error: trying to create a line selection with an invalidated view " )
self . _view = view
self . _view_name = view . name
def append ( self , index_t idx ) :
if idx > = self . _view . line_count :
raise Exception ( " Error: trying to select a line beyond the line count of a view " )
# if idx in self : # TODO discuss. Discuss order too
# pass
# else :
super ( OBIView_line_selection , self ) . append ( idx )
######################################################################################################
2015-10-02 13:51:26 +02:00
2016-02-18 10:38:51 +01:00
cdef class OBIDMS :
def __init__ ( self , str dms_name ) :
# Declarations
cdef bytes dms_name_b
# Format the character string to send to C function
dms_name_b = str2bytes ( dms_name )
# Fill structure and create or open the DMS
2016-09-28 16:56:44 +02:00
self . _pointer = obi_dms ( < const_char_p > dms_name_b )
if self . _pointer == NULL :
2016-02-18 10:38:51 +01:00
raise Exception ( " Failed opening or creating an OBIDMS " )
2015-10-15 15:12:45 +02:00
2016-09-28 16:56:44 +02:00
# name property getter
@property
def name ( self ) :
return bytes2str ( self . _pointer . dms_name )
2016-02-18 10:38:51 +01:00
cpdef close ( self ) :
2016-09-28 16:56:44 +02:00
if ( obi_close_dms ( self . _pointer ) ) < 0 :
2016-02-18 10:38:51 +01:00
raise Exception ( " Problem closing an OBIDMS " )
2016-02-25 09:43:27 +01:00
2016-03-11 13:56:38 +01:00
cpdef OBI_Taxonomy open_taxonomy ( self , str taxo_name ) :
return OBI_Taxonomy ( self , taxo_name )
2016-02-25 09:43:27 +01:00
2016-02-18 10:38:51 +01:00
cpdef OBIView open_view ( self , str view_name ) :
2016-02-25 09:43:27 +01:00
cdef object view_class
cdef dict view_infos
view_infos = self . read_view_infos ( view_name )
2016-02-29 16:33:30 +01:00
if view_infos [ " view_type " ] == bytes2str ( VIEW_TYPE_NUC_SEQS ) :
2016-02-25 09:43:27 +01:00
view_class = OBIView_NUC_SEQS
else :
view_class = OBIView
2016-06-30 11:41:30 +02:00
2016-02-25 09:43:27 +01:00
return view_class ( self , view_name )
2016-09-30 17:48:53 +02:00
cpdef OBIView new_view ( self , str view_name , object view_to_clone = None , OBIView_line_selection line_selection = None , str view_type = None , str comments = " " , bint quality_column = False ) :
2016-02-25 09:43:27 +01:00
cdef object view_class
2016-11-08 11:23:54 +01:00
2016-08-12 17:39:19 +02:00
# Get right subclass depending on view type
2016-02-25 09:43:27 +01:00
if view_type is not None :
2016-02-29 16:33:30 +01:00
if view_type == bytes2str ( VIEW_TYPE_NUC_SEQS ) :
2016-02-25 09:43:27 +01:00
view_class = OBIView_NUC_SEQS
else :
view_class = OBIView
2016-08-12 17:39:19 +02:00
# Check the type of the view to clone if there is one # TODO make generic for future other view types
2016-11-08 11:23:54 +01:00
if ( view_to_clone is not None and \
2016-10-06 17:55:18 +02:00
( ( type ( view_to_clone ) == str and self . read_view_infos ( view_to_clone ) [ " view_type " ] == bytes2str ( VIEW_TYPE_NUC_SEQS ) ) or \
2016-11-23 11:28:17 +01:00
( type ( view_to_clone ) != str and view_to_clone . type == bytes2str ( VIEW_TYPE_NUC_SEQS ) ) ) ) or \
2016-11-08 17:49:59 +01:00
( line_selection is not None and ( line_selection . _view . type == bytes2str ( VIEW_TYPE_NUC_SEQS ) ) ) :
2016-10-06 17:55:18 +02:00
view_type = bytes2str ( VIEW_TYPE_NUC_SEQS )
2016-08-12 17:39:19 +02:00
view_class = OBIView_NUC_SEQS
2016-02-25 09:43:27 +01:00
2016-10-06 17:55:18 +02:00
return view_class ( self , view_name , new = True , view_to_clone = view_to_clone , line_selection = line_selection , comments = comments , quality_column = quality_column , view_type = view_type )
2016-08-16 15:17:26 +02:00
2016-02-25 09:43:27 +01:00
cpdef dict read_view_infos ( self , str view_name ) :
2016-06-30 11:41:30 +02:00
cdef Obiview_infos_p view_infos_p
cdef dict view_infos_d
2016-08-01 18:25:30 +02:00
cdef Alias_column_pair_p column_refs
2016-05-04 13:43:26 +02:00
cdef int i , j
cdef str column_name
2016-06-30 11:41:30 +02:00
2016-09-28 16:56:44 +02:00
view_infos_p = obi_view_map_file ( self . _pointer , str2bytes ( view_name ) )
2016-06-30 11:41:30 +02:00
view_infos_d = { }
view_infos_d [ " name " ] = bytes2str ( view_infos_p . name )
view_infos_d [ " comments " ] = bytes2str ( view_infos_p . comments )
view_infos_d [ " view_type " ] = bytes2str ( view_infos_p . view_type )
view_infos_d [ " column_count " ] = < int > view_infos_p . column_count
view_infos_d [ " line_count " ] = < int > view_infos_p . line_count
view_infos_d [ " created_from " ] = bytes2str ( view_infos_p . created_from )
view_infos_d [ " creation_date " ] = bytes2str ( obi_format_date ( view_infos_p . creation_date ) )
if ( view_infos_p . all_lines ) :
view_infos_d [ " line_selection " ] = None
else :
view_infos_d [ " line_selection " ] = { }
view_infos_d [ " line_selection " ] [ " column_name " ] = bytes2str ( ( view_infos_p . line_selection ) . column_name )
view_infos_d [ " line_selection " ] [ " version " ] = < int > ( view_infos_p . line_selection ) . version
view_infos_d [ " column_references " ] = { }
2016-08-01 18:25:30 +02:00
column_references = view_infos_p . column_references
2016-06-30 11:41:30 +02:00
for j in range ( view_infos_d [ " column_count " ] ) :
2016-08-01 18:25:30 +02:00
column_name = bytes2str ( ( column_references [ j ] ) . alias )
2016-06-30 11:41:30 +02:00
view_infos_d [ " column_references " ] [ column_name ] = { }
2016-08-01 18:25:30 +02:00
view_infos_d [ " column_references " ] [ column_name ] [ " original_name " ] = bytes2str ( ( column_references [ j ] ) . column_refs . column_name )
view_infos_d [ " column_references " ] [ column_name ] [ " version " ] = ( column_references [ j ] ) . column_refs . version
2016-06-30 11:41:30 +02:00
2016-09-28 16:56:44 +02:00
obi_view_unmap_file ( self . _pointer , view_infos_p )
2016-06-30 11:41:30 +02:00
return view_infos_d
# cpdef dict read_views(self) : # TODO function that prints the dic nicely and function that prints 1 view nicely. Add column type in col ref
#
# cdef Obiviews_infos_all_p all_views_p
# cdef Obiview_infos_p view_p
# cdef Column_reference_p column_refs
# cdef int nb_views
# cdef int i, j
# cdef str view_name
# cdef str column_name
# cdef dict views
# cdef bytes name_b
#
# views = {}
2016-09-28 16:56:44 +02:00
# all_views_p = obi_read_view_infos(self._pointer)
2016-06-30 11:41:30 +02:00
# if all_views_p == NULL :
# raise Exception("No views to read")
# nb_views = <int> (all_views_p.header).view_count
# for i in range(nb_views) :
# view_p = (<Obiview_infos_p> (all_views_p.view_infos)) + i
# view_name = bytes2str(view_p.name)
# views[view_name] = {}
# views[view_name]["comments"] = bytes2str(view_p.comments)
# views[view_name]["view_type"] = bytes2str(view_p.view_type)
# views[view_name]["column_count"] = <int> view_p.column_count
# views[view_name]["line_count"] = <int> view_p.line_count
# views[view_name]["view_number"] = <int> view_p.view_number
# views[view_name]["created_from"] = bytes2str(view_p.created_from)
# views[view_name]["creation_date"] = bytes2str(obi_format_date(view_p.creation_date))
# if (view_p.all_lines) :
# views[view_name]["line_selection"] = None
# else :
# views[view_name]["line_selection"] = {}
# views[view_name]["line_selection"]["column_name"] = bytes2str((view_p.line_selection).column_name)
# views[view_name]["line_selection"]["version"] = <int> (view_p.line_selection).version
# views[view_name]["column_references"] = {}
# column_refs = view_p.column_references
# for j in range(views[view_name]["column_count"]) :
# column_name = bytes2str((column_refs[j]).column_name)
# views[view_name]["column_references"][column_name] = {}
# views[view_name]["column_references"][column_name]["version"] = column_refs[j].version
#
# obi_close_view_infos(all_views_p);
#
# return views
2016-02-25 09:43:27 +01:00