Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git

This commit is contained in:
2017-07-28 15:57:01 +02:00
10 changed files with 178 additions and 94 deletions

View File

@ -18,16 +18,16 @@ import string
import random
VIEW_TYPES = ["", "NUC_SEQS_VIEW"]
VIEW_TYPES = [b"", b"NUC_SEQS_VIEW"]
COL_TYPES = [OBI_INT, OBI_FLOAT, OBI_BOOL, OBI_CHAR, OBI_STR, OBI_SEQ]
NUC_SEQUENCE_COLUMN = "NUC_SEQ"
ID_COLUMN = "ID"
DEFINITION_COLUMN = "DEFINITION"
QUALITY_COLUMN = "QUALITY"
NUC_SEQUENCE_COLUMN = b"NUC_SEQ"
ID_COLUMN = b"ID"
DEFINITION_COLUMN = b"DEFINITION"
QUALITY_COLUMN = b"QUALITY"
SPECIAL_COLUMNS = [NUC_SEQUENCE_COLUMN, ID_COLUMN, DEFINITION_COLUMN, QUALITY_COLUMN]
#TAXDUMP = "" TODO path=?
TAXTEST = "taxtest"
TAXTEST = b"taxtest"
NAME_MAX_LEN = 200
COL_COMMENTS_MAX_LEN = 2048
@ -94,7 +94,7 @@ def random_str_with_max_len(max_len):
def random_bytes_with_max_len(max_len):
return str2bytes(''.join(random.choice(string.ascii_lowercase) for i in range(random_length(max_len))))
return str2bytes(random_str_with_max_len(max_len))
def random_column(infos):
@ -102,17 +102,17 @@ def random_column(infos):
def random_unique_name(infos):
name = ""
while name == "" or name in infos['unique_names'] :
name = random_str_with_max_len(NAME_MAX_LEN)
name = b""
while name == b"" or name in infos['unique_names'] :
name = random_bytes_with_max_len(NAME_MAX_LEN)
infos['unique_names'].append(name)
return name
def random_unique_element_name(config, infos):
name = ""
while name == "" or name in infos['unique_names'] :
name = random_str_with_max_len(config['test']['elt_name_max_len'])
name = b""
while name == b"" or name in infos['unique_names'] :
name = random_bytes_with_max_len(config['test']['elt_name_max_len'])
infos['unique_names'].append(name)
return name
@ -128,7 +128,7 @@ def test_set_and_get(config, infos):
col = infos['view'][col_name]
element_names = col.elements_names
data_type = col.data_type
if data_type == "OBI_QUAL" :
if data_type == b"OBI_QUAL" :
print_test(config, "-")
return
idx = random_int(config)
@ -217,7 +217,7 @@ def fill_column(config, infos, col) :
def create_random_column(config, infos) :
alias = random.choice(['', random_unique_name(infos)])
alias = random.choice([b'', random_unique_name(infos)])
nb_elements_per_line=random.randint(1, config['test']['maxelts'])
elements_names = []
for i in range(nb_elements_per_line) :
@ -235,7 +235,7 @@ def create_random_column(config, infos) :
alias=alias
)
if alias != '' :
if alias != b'' :
assert infos['view'][alias] == column
else :
assert infos['view'][name] == column
@ -257,7 +257,7 @@ def random_new_view(config, infos, first=False):
infos['view_names'].append(infos['view'].name)
infos['view'].close()
v_to_clone = View.open(infos['dms'], random.choice(infos["view_names"]))
v_type = ""
v_type = b""
print_test(config, "View to clone: ")
print_test(config, repr(v_to_clone))
create_line_selection = random_bool(config)
@ -271,15 +271,15 @@ def random_new_view(config, infos, first=False):
v_type = random_view_type()
if line_selection is not None :
infos['view'] = line_selection.materialize(random_unique_name(infos), comments=random_str_with_max_len(config['test']['commentsmaxlen']))
infos['view'] = line_selection.materialize(random_unique_name(infos), comments=random_bytes_with_max_len(config['test']['commentsmaxlen']))
elif v_to_clone is not None :
infos['view'] = v_to_clone.clone(random_unique_name(infos), comments=random_str_with_max_len(config['test']['commentsmaxlen']))
infos['view'] = v_to_clone.clone(random_unique_name(infos), comments=random_bytes_with_max_len(config['test']['commentsmaxlen']))
else :
if v_type == "NUC_SEQS_VIEW" :
infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_str_with_max_len(config['test']['commentsmaxlen'])) # TODO quality column
infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_bytes_with_max_len(config['test']['commentsmaxlen'])) # TODO quality column
else :
infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_str_with_max_len(config['test']['commentsmaxlen'])) # TODO quality column
infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_bytes_with_max_len(config['test']['commentsmaxlen'])) # TODO quality column
print_test(config, repr(infos['view']))
if v_to_clone is not None :
if line_selection is None:

View File

@ -20,7 +20,6 @@ cdef class Column(OBIWrapper) :
cdef bytes _alias
cdef inline OBIDMS_column_p pointer(self)
cpdef close(self)
@staticmethod
cdef type get_column_class(obitype_t obitype, bint multi_elts)

View File

@ -20,7 +20,7 @@ from ..capi.obiview cimport obi_view_add_column, \
obi_view_get_pointer_on_column_in_view, \
Obiview_p
from ..object cimport OBIObjectClosedInstance
from ..object cimport OBIDeactivatedInstanceError
from obitools3.utils cimport tobytes, \
bytes2str, \
@ -76,7 +76,7 @@ cdef class Column(OBIWrapper) :
cdef char* elements_names_p
if not view.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
if alias_b == b"" :
alias_b = column_name_b
@ -117,7 +117,7 @@ cdef class Column(OBIWrapper) :
cdef type column_class
if not view.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
column_pp = obi_view_get_pointer_on_column_in_view(view.pointer(),
column_name_b)
@ -139,16 +139,17 @@ cdef class Column(OBIWrapper) :
return column
@OBIWrapper.checkIsActive
def add_to_view(self,
View view,
View view,
object column_name=None) :
cdef bytes alias
cdef OBIDMS_column_p column_p = self.pointer()
if not view.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
if (column_name is None):
alias = self._alias
@ -177,7 +178,8 @@ cdef class Column(OBIWrapper) :
view.register(self)
@OBIWrapper.checkIsActive
def __len__(self):
'''
implements the len() function for the Column class
@ -186,7 +188,8 @@ cdef class Column(OBIWrapper) :
'''
return self.lines_used
@OBIWrapper.checkIsActive
def __sizeof__(self):
'''
returns the size of the C object wrapped by the Column instance
@ -194,21 +197,26 @@ cdef class Column(OBIWrapper) :
cdef OBIDMS_column_header_p header = self.pointer().header
return header.header_size + header.data_size
@OBIWrapper.checkIsActive
def __iter__(self):
cdef index_t line_nb
for line_nb in range(self.lines_used):
yield self.get_line(line_nb)
# TODO check time efficiency with and without
@OBIWrapper.checkIsActive
def __setitem__(self, index_t line_nb, object value):
self.set_line(line_nb, value)
@OBIWrapper.checkIsActive
def __getitem__(self, index_t line_nb):
return self.get_line(line_nb)
@OBIWrapper.checkIsActive
def __str__(self) :
cdef str to_print
cdef object line
@ -217,14 +225,15 @@ cdef class Column(OBIWrapper) :
to_print = to_print + str(line) + "\n"
return to_print
@OBIWrapper.checkIsActive
def __repr__(self) :
cdef bytes s
s = self._alias + b", original name: " + self.original_name + b", version " + str2bytes(str(self.version)) + b", data type: " + self.data_type
return bytes2str(s)
cpdef close(self): # TODO discuss, can't be called bc then bug when closing view that tries to close it in C
def close(self): # TODO discuss, can't be called bc then bug when closing view that tries to close it in C
cdef OBIDMS_column_p pointer
@ -239,54 +248,76 @@ cdef class Column(OBIWrapper) :
# Column alias property getter and setter
@property
def name(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self._alias
@name.setter
def name(self, object new_alias): # @DuplicatedSignature
if not self.active() :
raise OBIDeactivatedInstanceError()
self._view.rename_column(self._alias, new_alias)
# elements_names property getter
@property
def elements_names(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return obi_get_elements_names(self.pointer()).split(b';')
# nb_elements_per_line property getter
@property
def nb_elements_per_line(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.nb_elements_per_line
# data_type property getter
@property
def data_type(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return name_data_type(self.data_type_int)
# data_type integer code property getter
@property
def data_type_int(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.returned_data_type
# original_name property getter
@property
def original_name(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.name
# version property getter
@property
def version(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.version
# lines_used property getter
@property
def lines_used(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.lines_used
# comments property getter
@property
def comments(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.comments
# creation_date property getter
@property
def creation_date(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return obi_format_date(self.pointer().header.creation_date)
@ -295,10 +326,14 @@ cdef class Column(OBIWrapper) :
cdef class Column_multi_elts(Column) :
@OBIWrapper.checkIsActive
def __getitem__(self, index_t line_nb):
return Column_line(self, line_nb)
cpdef set_line(self, index_t line_nb, object values):
cdef object element_name
if not self.active() :
raise OBIDeactivatedInstanceError()
if values is None :
for element_name in self.elements_names :
self.set_item(line_nb, element_name, None)

View File

@ -17,11 +17,12 @@ from obitools3.utils cimport bytes2str, \
tobytes, \
tostr
from .object cimport OBIObjectClosedInstance
from .object cimport OBIDeactivatedInstanceError
from pathlib import Path
from .view import view
from .object import OBIWrapper
cdef class DMS(OBIWrapper):
@ -102,6 +103,7 @@ cdef class DMS(OBIWrapper):
return <bytes> self.pointer().dms_name
@OBIWrapper.checkIsActive
def keys(self) :
cdef const_char_p path = obi_dms_get_full_path(self.pointer(), b"VIEWS")
@ -117,18 +119,21 @@ cdef class DMS(OBIWrapper):
yield str2bytes(v.stem)
@OBIWrapper.checkIsActive
def values(self) :
cdef bytes view_name
for view_name in self.keys():
yield self.get_view(view_name)
@OBIWrapper.checkIsActive
def items(self) :
cdef bytes view_name
for view_name in self.keys():
yield (view_name, self.get_view(view_name))
@OBIWrapper.checkIsActive
def __contains__(self, key) :
cdef str key_s = tostr(key)
@ -145,18 +150,22 @@ cdef class DMS(OBIWrapper):
return PyList_Size(list(self.keys()))
@OBIWrapper.checkIsActive
def __len__(self) :
return self.view_count()
@OBIWrapper.checkIsActive
def __getitem__(self, object view_name):
return self.get_view(view_name)
@OBIWrapper.checkIsActive
def __iter__(self) :
return self.keys()
@OBIWrapper.checkIsActive
def get_view(self, object view_name) :
return view.View.open(self, view_name)

View File

@ -10,7 +10,6 @@ cdef class OBIObject:
cdef register(self, OBIObject object)
cdef unregister(self, OBIObject object)
cpdef close(self)
cdef class OBIWrapper(OBIObject):
@ -24,5 +23,5 @@ cdef class OBIWrapper(OBIObject):
cdef object new_wrapper(type constructor, void* pointer)
cdef class OBIObjectClosedInstance(Exception):
cdef class OBIDeactivatedInstanceError(Exception):
pass

View File

@ -1,5 +1,7 @@
#cython: language_level=3
import functools
__c_cython_mapping__ = {}
@ -26,7 +28,7 @@ cdef class OBIObject:
del self._dependent_objects[id(object)]
cpdef close(self):
def close(self):
cdef OBIObject object
cdef list to_close = list((self._dependent_objects).values())
@ -42,6 +44,18 @@ cdef class OBIWrapper(OBIObject) :
The OBIWrapper class enables to wrap a C object representing a DMS or an element from a DMS.
'''
@staticmethod
def checkIsActive(instance):
'''
Decorator function to check that an instance is still active (associated pointer not NULL)
'''
@functools.wraps(instance)
def check(self,*args,**kargs):
if self.dead:
raise OBIDeactivatedInstanceError()
return instance(self,*args,**kargs)
return check
cdef inline size_t cid(self) :
return <size_t>(self._pointer)
@ -50,7 +64,7 @@ cdef class OBIWrapper(OBIObject) :
return self._pointer != NULL
cpdef close(self):
def close(self):
if (self._pointer != NULL):
OBIObject.close(self)
del __c_cython_mapping__[<size_t>self._pointer]
@ -68,6 +82,9 @@ cdef class OBIWrapper(OBIObject) :
'''
self.close()
@property
def dead(self):
return self._pointer==NULL
@staticmethod
cdef object new_wrapper(type constructor, void* pointer) :

View File

@ -16,7 +16,6 @@ cdef class Taxonomy(OBIWrapper) :
cpdef get_taxon_by_idx(self, int idx)
cpdef write(self, str prefix)
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=*)
cpdef close(self)
cdef class Taxon :
cdef ecotx_t* _pointer

View File

@ -108,7 +108,7 @@ cdef class Taxonomy(OBIWrapper) :
return taxid
cpdef close(self) :
def close(self) :
cdef OBIDMS_taxonomy_p pointer = self.pointer()

View File

@ -20,7 +20,7 @@ cdef class View(OBIWrapper):
cdef DMS _dms
cdef inline Obiview_p pointer(self)
cpdef delete_column(self,
object column_name)

View File

@ -23,7 +23,7 @@ from obitools3.utils cimport tobytes, \
bytes2str, \
tostr
from ..object cimport OBIObjectClosedInstance
from ..object cimport OBIDeactivatedInstanceError
from obitools3.dms.view import typed_view
@ -92,14 +92,13 @@ cdef class View(OBIWrapper) :
object view_name,
object comments=None):
cdef bytes view_name_b = tobytes(view_name)
cdef bytes comments_b
cdef void* pointer
cdef View view
if not self.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
if comments is not None:
comments_b = tobytes(comments)
@ -149,7 +148,7 @@ cdef class View(OBIWrapper) :
return view
cpdef close(self):
def close(self):
cdef Obiview_p pointer = self.pointer()
if self.active() :
@ -160,34 +159,29 @@ cdef class View(OBIWrapper) :
bytes2str(self.name))
def __repr__(self) :
# TODO check everywhere
if not self.active() :
raise OBIObjectClosedInstance()
@OBIWrapper.checkIsActive
def __repr__(self) :
cdef str s = "{name:s}\n{comments:s}\n{line_count:d} lines\n".format(name = str(self.name),
comments = str(self.comments),
line_count = self.line_count)
for column_name in self.keys() :
s = s + repr(self[column_name]) + '\n'
return s
def keys(self):
cdef str col_alias
cdef bytes col_alias
cdef int i
cdef Obiview_p pointer = self.pointer()
cdef int nb_column = pointer.infos.column_count
cdef Alias_column_pair_p column_p = pointer.infos.column_references
if not self.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
for i in range(nb_column) :
col_alias = bytes2str(column_p[i].alias)
col_alias = column_p[i].alias
yield col_alias
@ -195,9 +189,9 @@ cdef class View(OBIWrapper) :
object column_name):
if not self.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
return Column.open(self, column_name)
return Column.open(self, tobytes(column_name))
def get_column_with_idx(self,
@ -207,24 +201,24 @@ cdef class View(OBIWrapper) :
cdef int nb_column = pointer.infos.column_count
if not self.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
if column_idx > nb_column :
raise IndexError(column_idx, "No column with this index")
return Column.open(self, pointer.infos.column_references[column_idx].alias)
cpdef delete_column(self,
object column_name) :
cdef bytes column_name_b = tobytes(column_name)
if not self.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
# Close the cython instance first
col = self[column_name]
col = self[column_name_b]
col.close()
# Remove the column from the view which closes the C structure
@ -242,7 +236,7 @@ cdef class View(OBIWrapper) :
cdef bytes new_name_b = tobytes(new_name)
if not self.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
if (obi_view_create_column_alias(self.pointer(),
tobytes(current_name_b),
@ -262,8 +256,12 @@ cdef class View(OBIWrapper) :
cdef Column old_column
cdef Column new_column
cdef index_t length = len(self)
old_column = self.get_column(column_name)
cdef column_name_b = tobytes(column_name)
if not self.active() :
raise OBIDeactivatedInstanceError()
old_column = self.get_column(column_name_b)
if new_data_type == 0 :
new_data_type = old_column.data_type
@ -276,24 +274,27 @@ cdef class View(OBIWrapper) :
new_column = Column.new_column(self, old_column.pointer().header.name, new_data_type,
nb_elements_per_line=new_nb_elements_per_line, elements_names=new_elements_names,
comments=old_column.comments, alias=tobytes(column_name)+tobytes('___new___'))
comments=old_column.comments, alias=column_name_b+tobytes('___new___'))
for i in range(length) :
new_column[i] = old_column[i]
# Remove old column from view
self.delete_column(column_name)
self.delete_column(column_name_b)
# Rename new
new_column.name = column_name
new_column.name = column_name_b
return new_column
cpdef Line_selection new_selection(self,list lines=None):
cpdef Line_selection new_selection(self, list lines=None):
if not self.active() :
raise OBIDeactivatedInstanceError()
return Line_selection(self, lines)
@OBIWrapper.checkIsActive
def __iter__(self):
# Iteration on each line of all columns
@ -304,14 +305,17 @@ cdef class View(OBIWrapper) :
for line_nb in range(self.line_count) :
yield Line(self, line_nb)
# TODO test time gain without
@OBIWrapper.checkIsActive
def __getitem__(self, object ref) :
if type(ref) == int :
return Line(self, ref)
else : # TODO assume str or bytes for optimization (discuss)
return self.get_column(ref) # TODO hyper lent dans la pratique
@OBIWrapper.checkIsActive
def __setitem__(self, index_t idx, object item) :
cdef Column col
line = self[idx]
@ -329,15 +333,18 @@ cdef class View(OBIWrapper) :
)
line[k] = item[k]
def __contains__(self, str column_name):
return (column_name in self.keys())
@OBIWrapper.checkIsActive
def __contains__(self, object column_name):
return (tobytes(column_name) in self.keys())
@OBIWrapper.checkIsActive
def __len__(self):
return(self.line_count)
@OBIWrapper.checkIsActive
def __str__(self) :
cdef Line line
cdef str to_print
@ -350,36 +357,48 @@ cdef class View(OBIWrapper) :
# Width (column count) property getter
@property
def width(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().infos.column_count
# DMS property getter
@property
def dms(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self._dms
# line_count property getter
@property
def line_count(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().infos.line_count
# name property getter
@property
def name(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return <bytes> self.pointer().infos.name
# view type property getter
@property
def type(self): # @ReservedAssignment
if not self.active() :
raise OBIDeactivatedInstanceError()
return <bytes> self.pointer().infos.view_type
# comments property getter
@property
def comments(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return <bytes> self.pointer().infos.comments
# TODO setter that concatenates new comments?
@ -391,20 +410,21 @@ cdef class Line :
self._view = view
def __getitem__(self, str column_name) :
return (self._view).get_column(column_name)[self._index]
def __getitem__(self, object column_name) :
return (self._view).get_column(tobytes(column_name))[self._index]
def __setitem__(self, object column_name_, object value): # TODO discuss
def __setitem__(self, object column_name, object value): # TODO discuss
# TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get)
# TODO OBI_QUAL ?
cdef type value_type
cdef obitype_t value_obitype
cdef bytes value_b
cdef bytes column_name_b
column_name = tostr(column_name_) # TODO
column_name_b = tobytes(column_name)
if column_name not in self._view :
if column_name_b not in self._view :
if value == None :
raise Exception("Trying to create a column from a None value (can't guess type)")
value_type = type(value)
@ -428,27 +448,33 @@ cdef class Line :
else :
raise Exception("Could not guess the type of a value to create a new column")
Column.new_column(self._view, column_name, value_obitype)
Column.new_column(self._view, column_name_b, value_obitype)
(self._view).get_column(column_name).set_line(self._index, value)
(self._view).get_column(column_name_b).set_line(self._index, value)
def __iter__(self):
cdef bytes column_name
for column_name in (self._view).keys() :
yield column_name
def keys(self):
return self._view.keys()
def __contains__(self, str column_name):
return (column_name in self.keys())
def __contains__(self, object column_name):
return (tobytes(column_name) in self.keys())
def __repr__(self):
cdef dict line
cdef str column_name
cdef dict line
cdef bytes column_name_b
cdef str column_name_str
line = {}
for column_name in self._view.keys() :
line[column_name] = self[column_name]
for column_name_b in self._view.keys() :
column_name_str = bytes2str(column_name_b)
line[column_name_str] = self[column_name_str]
return str(line)
# View property getter
@ -559,7 +585,7 @@ cdef class Line_selection(list):
cdef View view
if not self._view.active() :
raise OBIObjectClosedInstance()
raise OBIDeactivatedInstanceError()
if comments is not None:
comments_b = tobytes(comments)
@ -568,7 +594,7 @@ cdef class Line_selection(list):
pointer = obi_clone_view(self._view._dms.pointer(),
self._view.pointer(),
view_name_b,
view_name_b,
self.__build_binary_list__(),
comments_b)
@ -589,7 +615,7 @@ cdef class Line_selection(list):
cdef register_view_class(bytes view_type_name,
type view_class):
type view_class):
'''
Each subclass of `dms.view` needs to be registered after its declaration
'''