Major changes : new cython subclasses to handle columns with multiple

elements per line in a more efficient way + now elements_names are
passed as a list + new function to recover only the header of a column
This commit is contained in:
Celine Mercier
2015-10-14 18:05:34 +02:00
parent 21923e213d
commit 0eaa5aa784
22 changed files with 749 additions and 516 deletions

View File

@ -6,37 +6,48 @@ from obitools3.utils cimport bytes2str, str2bytes
from .capi.obidms cimport obi_dms, \
obi_close_dms
from .capi.obidmscolumn cimport obi_column_get_data_type_from_name, \
obi_column_get_latest_version_from_name, \
obi_column_get_line_count_from_name, \
obi_column_get_nb_lines_used, \
from .capi.obidmscolumn cimport obi_column_get_nb_lines_used, \
obi_column_get_elements_names, \
obi_column_get_formatted_creation_date, \
obi_column_get_formatted_creation_date_from_name, \
obi_column_get_header_from_name, \
obi_unmap_header, \
obi_column_get_latest_version_from_name, \
obi_create_column, \
obi_clone_column, \
obi_open_column, \
obi_close_column
from .capi.obitypes cimport const_char_p, name_data_type
obi_close_column, \
OBIDMS_column_header_p
#obi_column_get_formatted_creation_date
from .capi.obitypes cimport const_char_p, \
name_data_type
from ._obidms cimport OBIDMS
from ._obidms cimport OBIDMS_column
from ._obidmscolumn_int cimport OBIDMS_column_int, \
OBIDMS_column_int_writable
OBIDMS_column_int_writable, \
OBIDMS_column_int_multi_elts, \
OBIDMS_column_int_multi_elts_writable
from ._obidmscolumn_float cimport OBIDMS_column_float, \
OBIDMS_column_float_writable
OBIDMS_column_float_writable, \
OBIDMS_column_float_multi_elts, \
OBIDMS_column_float_multi_elts_writable
from ._obidmscolumn_bool cimport OBIDMS_column_bool, \
OBIDMS_column_bool_writable
OBIDMS_column_bool_writable, \
OBIDMS_column_bool_multi_elts, \
OBIDMS_column_bool_multi_elts_writable
from ._obidmscolumn_char cimport OBIDMS_column_char, \
OBIDMS_column_char_writable
OBIDMS_column_char_writable, \
OBIDMS_column_char_multi_elts, \
OBIDMS_column_char_multi_elts_writable
from ._obidmscolumn_idx cimport OBIDMS_column_idx, \
OBIDMS_column_idx_writable
# from ._obidmscolumn_idx cimport OBIDMS_column_idx, \
# OBIDMS_column_idx_writable, \
# OBIDMS_column_idx_multi_elts, \
# OBIDMS_column_idx_multi_elts_writable
cdef class OBIDMS :
@ -72,6 +83,7 @@ cdef class OBIDMS :
cdef str creation_date
cdef obiversion_t latest_version
cdef size_t line_count
cdef OBIDMS_column_header_p header
p = Path(self.dms_name+'.obidms')
@ -85,11 +97,12 @@ cdef class OBIDMS :
column_name = entry.stem
column_name_b = str2bytes(column_name)
dms[column_name] = {}
data_type = bytes2str(name_data_type(obi_column_get_data_type_from_name(self.pointer, column_name_b)))
header = obi_column_get_header_from_name(self.pointer, column_name_b)
data_type = bytes2str(name_data_type(header.data_type))
line_count = header.line_count
latest_version = obi_column_get_latest_version_from_name(self.pointer, column_name_b)
line_count = obi_column_get_line_count_from_name(self.pointer, column_name_b)
# creation_date = bytes2str(obi_column_get_formatted_creation_date_from_name(self.pointer, column_name_b)) #TODO
# creation_date = bytes2str(obi_column_get_formatted_creation_date_from_name(self.pointer, column_name_b)) #TODO (deprecated, use header)
# print(creation_date)
dms[column_name]['data_type'] = data_type
@ -98,6 +111,7 @@ cdef class OBIDMS :
# dms[column_name]['creation_date'] = creation_date
print("{:<25} {:<25} {:<25} {:<25}".format(column_name, data_type, latest_version, line_count))
obi_unmap_header(header) # TODO check if error? but C will already warn and there's nothing to do
return dms
@ -108,97 +122,110 @@ cdef class OBIDMS :
obiversion_t version_number=-1,
OBIType_t data_type= <OBIType_t> 0,
size_t nb_lines=0,
size_t nb_elements_per_line=1,
str elements_names=None):
size_t nb_elements_per_line=0,
list elements_names=None):
# Declarations
cdef OBIDMS_column column
cdef object subclass # TODO object?
cdef bytes column_name_b
cdef OBIDMS_column_header_p header
header = NULL
# Format the character string to send to C function
column_name_b = str2bytes(column_name)
# Get the header of the latest version of the column if
# some needed informations are not provided
if ((not data_type or not nb_elements_per_line) and not create) :
header = obi_column_get_header_from_name(self.pointer, column_name_b)
# Get the data type if not provided
if not data_type :
if create :
raise Exception("A data type must be specified")
else :
data_type = obi_column_get_data_type_from_name(self.pointer, column_name_b)
data_type = header.data_type
# Open the column with the right subclass depending on the data type and the mode (read-only or writable)
# Get the number of elements per line if not provided and needed
if not nb_elements_per_line :
if create : # Set to one if not provided (default value)
nb_elements_per_line = 1
else :
nb_elements_per_line = header.nb_elements_per_line
if nb_elements_per_line > 1 :
elements_names = bytes2str(header.elements_names).split(';')
if header != NULL :
obi_unmap_header(header) # TODO check if error? but C will already warn and there's nothing to do
# Open the column with the right subclass depending on the data type, the mode
# (read-only or writable) and whether there are multiple elements per line or not
if data_type == 1 :
if (create or clone) :
column = OBIDMS_column_int_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_int_writable
else :
subclass = OBIDMS_column_int_multi_elts_writable
else :
column = OBIDMS_column_int(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_int
else :
subclass = OBIDMS_column_int_multi_elts
elif data_type == 2 :
if (create or clone) :
column = OBIDMS_column_float_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_float_writable
else :
subclass = OBIDMS_column_float_multi_elts_writable
else :
column = OBIDMS_column_float(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_float
else :
subclass = OBIDMS_column_float_multi_elts
elif data_type == 3 :
if (create or clone) :
column = OBIDMS_column_bool_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_bool_writable
else :
subclass = OBIDMS_column_bool_multi_elts_writable
else :
column = OBIDMS_column_bool(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_bool
else :
subclass = OBIDMS_column_bool_multi_elts
elif data_type == 4 :
if (create or clone) :
column = OBIDMS_column_char_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_char_writable
else :
subclass = OBIDMS_column_char_multi_elts_writable
else :
column = OBIDMS_column_char(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_char
else :
subclass = OBIDMS_column_char_multi_elts
# elif data_type == 5 :
# if (create or clone) :
# column = OBIDMS_column_idx_writable(self, column_name,
# create, clone, clone_data,
# version_number, data_type,
# nb_lines, nb_elements_per_line,
# elements_names)
# if nb_elements_per_line == 1 :
# subclass = OBIDMS_column_idx_writable
# else :
# subclass = OBIDMS_column_idx_multi_elts_writable
# else :
# column = OBIDMS_column_idx(self, column_name,
# create, clone, clone_data,
# version_number, data_type,
# nb_lines, nb_elements_per_line,
# elements_names)
# if nb_elements_per_line == 1 :
# subclass = OBIDMS_column_idx
# else :
# subclass = OBIDMS_column_idx_multi_elts
else :
raise Exception("Problem with the data type")
column = subclass(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
return column
@ -215,7 +242,7 @@ cdef class OBIDMS_column :
OBIType_t type,
size_t nb_lines,
size_t nb_elements_per_line,
str elements_names):
list elements_names):
# Declarations
cdef bytes column_name_b
@ -226,6 +253,8 @@ cdef class OBIDMS_column :
self.dms = dms
self.data_type = bytes2str(name_data_type(type))
self.column_name = column_name
self.nb_elements_per_line = nb_elements_per_line
self.elements_names = elements_names
# Format the character strings to send them to C functions
column_name_b = str2bytes(column_name)
@ -236,7 +265,7 @@ cdef class OBIDMS_column :
if elements_names == None :
elements_names_b = column_name_b
else :
elements_names_b = str2bytes(elements_names)
elements_names_b = str2bytes(";".join(elements_names))
self.pointer = obi_create_column(self.dms.pointer, column_name_b, type, nb_lines, nb_elements_per_line, elements_names_b)
else :
if clone :
@ -248,49 +277,28 @@ cdef class OBIDMS_column :
def __iter__(self):
# Declarations
cdef list elements_names
cdef str element_name
cdef bint multiple_elements
cdef object line # TODO
cdef size_t lines_used
cdef size_t line_nb
# Check if there are multiple elements per line anf if yes, get their names
elements_names = self.get_elements_names()
if len(elements_names) > 1 :
multiple_elements = True
else :
element_name = elements_names[0]
# Yield each line
lines_used = obi_column_get_nb_lines_used(self.pointer)
for line_nb in xrange(lines_used):
if multiple_elements :
line = []
for element_name in elements_names :
line.append(self.get_item(line_nb, element_name))
else :
line = self.get_item(line_nb, element_name)
yield line
for line_nb in range(lines_used):
yield self.get_line(line_nb)
def __setitem__(self, size_t line_nb, object value):
self.set_item(line_nb, "", value)
self.set_line(line_nb, value)
def __getitem__(self, size_t line_nb):
return self.get_item(line_nb, "")
return self.get_line(line_nb)
cpdef object get_item(self, size_t line_nb, str element_name):
raise NotImplementedError
# cpdef object get_item(self, size_t line_nb, str element_name): TODO
# raise NotImplementedError
# cpdef set_item(self, size_t line_nb, str element_name, object value): TODO
# raise NotImplementedError
cpdef list get_elements_names(self):
cdef bytes elements_names
elements_names = obi_column_get_elements_names(self.pointer)
return (bytes2str(elements_names)).split(';')
return self.elements_names
cpdef str get_data_type(self):
return self.data_type
@ -298,8 +306,8 @@ cdef class OBIDMS_column :
cpdef size_t get_nb_lines_used(self):
return obi_column_get_nb_lines_used(self.pointer)
cpdef str get_creation_date(self):
return bytes2str(obi_column_get_formatted_creation_date(self.pointer))
# cpdef str get_creation_date(self):
# return bytes2str(obi_column_get_formatted_creation_date(self.pointer))
cpdef close(self):
raise NotImplementedError