New column type for DNA sequences. Only for those coded on 2 bits (only
'ATGCatgc') for now.
This commit is contained in:
@ -14,3 +14,5 @@
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiarray.h
|
||||
../../../src/obiarray.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
@ -47,6 +47,11 @@ from ._obidmscolumn_str cimport OBIDMS_column_str, \
|
||||
OBIDMS_column_str_multi_elts, \
|
||||
OBIDMS_column_str_multi_elts_writable
|
||||
|
||||
from ._obidmscolumn_seq cimport OBIDMS_column_seq, \
|
||||
OBIDMS_column_seq_writable, \
|
||||
OBIDMS_column_seq_multi_elts, \
|
||||
OBIDMS_column_seq_multi_elts_writable
|
||||
|
||||
|
||||
cdef class OBIDMS :
|
||||
|
||||
@ -215,6 +220,17 @@ cdef class OBIDMS :
|
||||
subclass = OBIDMS_column_str
|
||||
else :
|
||||
subclass = OBIDMS_column_str_multi_elts
|
||||
elif data_type == 6 :
|
||||
if (create or clone) :
|
||||
if nb_elements_per_line == 1 :
|
||||
subclass = OBIDMS_column_seq_writable
|
||||
else :
|
||||
subclass = OBIDMS_column_seq_multi_elts_writable
|
||||
else :
|
||||
if nb_elements_per_line == 1 :
|
||||
subclass = OBIDMS_column_seq
|
||||
else :
|
||||
subclass = OBIDMS_column_seq_multi_elts
|
||||
else :
|
||||
raise Exception("Problem with the data type")
|
||||
|
||||
@ -238,7 +254,7 @@ cdef class OBIDMS_column :
|
||||
bint create,
|
||||
bint clone, bint clone_data,
|
||||
obiversion_t version_number,
|
||||
OBIType_t type,
|
||||
OBIType_t type, # There's a problem with this with the OBI_IDX columns as there are 2 subtypes
|
||||
index_t nb_lines,
|
||||
index_t nb_elements_per_line,
|
||||
list elements_names,
|
||||
|
18
python/obitools3/obidms/_obidmscolumn_seq.cfiles
Normal file
18
python/obitools3/obidms/_obidmscolumn_seq.cfiles
Normal file
@ -0,0 +1,18 @@
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiarray.h
|
||||
../../../src/obiarray.c
|
25
python/obitools3/obidms/_obidmscolumn_seq.pxd
Normal file
25
python/obitools3/obidms/_obidmscolumn_seq.pxd
Normal file
@ -0,0 +1,25 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obitypes cimport index_t
|
||||
from ._obidms cimport OBIDMS_column
|
||||
|
||||
|
||||
cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_seq_writable(OBIDMS_column_seq):
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_seq_multi_elts(OBIDMS_column_seq):
|
||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, str value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
|
||||
cdef class OBIDMS_column_seq_multi_elts_writable(OBIDMS_column_seq_multi_elts):
|
||||
cpdef set_item(self, index_t line_nb, str element_name, str value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
cpdef close(self)
|
103
python/obitools3/obidms/_obidmscolumn_seq.pyx
Normal file
103
python/obitools3/obidms/_obidmscolumn_seq.pyx
Normal file
@ -0,0 +1,103 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obidmscolumn cimport obi_close_column,\
|
||||
obi_truncate_and_close_column, \
|
||||
obi_column_get_obiseq_with_elt_name, \
|
||||
obi_column_get_obiseq_with_elt_idx, \
|
||||
obi_column_set_obiseq_with_elt_name, \
|
||||
obi_column_set_obiseq_with_elt_idx
|
||||
from .capi.obierrno cimport obi_errno
|
||||
from .capi.obitypes cimport OBIIdx_NA, const_char_p
|
||||
|
||||
from obitools3.utils cimport str2bytes, bytes2str
|
||||
|
||||
|
||||
cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef bytes value
|
||||
cdef object result
|
||||
value = <bytes> obi_column_get_obiseq_with_elt_idx(self.pointer, line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIIdx_NA :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_seq_writable(OBIDMS_column_seq):
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
if obi_column_set_obiseq_with_elt_idx(self.pointer, line_nb, 0, str2bytes(value)) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_seq_multi_elts(OBIDMS_column_seq):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef bytes value
|
||||
cdef object result
|
||||
value = <bytes> obi_column_get_obiseq_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if value == OBIIdx_NA :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
return result
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef bytes value
|
||||
cdef object result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = <bytes> obi_column_get_obiseq_with_elt_idx(self.pointer, line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
result[self.elements_names[i]] = bytes2str(value)
|
||||
if all_NA and (value != OBIIdx_NA) :
|
||||
all_NA = False
|
||||
if all_NA :
|
||||
result = None
|
||||
return result
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, str value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_seq_multi_elts_writable(OBIDMS_column_seq_multi_elts):
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, str value):
|
||||
if obi_column_set_obiseq_with_elt_name(self.pointer, line_nb, str2bytes(element_name), str2bytes(value)) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
cdef str value
|
||||
for element_name in values :
|
||||
value = values[element_name]
|
||||
self.set_item(line_nb, element_name, value)
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
@ -163,10 +163,30 @@ cdef extern from "obidmscolumn_str.h" nogil:
|
||||
char* value)
|
||||
|
||||
const_char_p obi_column_get_obistr_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
const_char_p obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
cdef extern from "obidmscolumn_seq.h" nogil:
|
||||
|
||||
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
char* value)
|
||||
|
||||
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
char* value)
|
||||
|
||||
const_char_p obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
const_char_p obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
|
@ -10,9 +10,9 @@ from obitools3.obidms._obidms import OBIDMS
|
||||
|
||||
LINE_COUNT_FOR_TEST_COLUMN = 10000 # TODO randomize?
|
||||
SMALLER_LINE_COUNT_FOR_TEST_COLUMN = 1000 # TODO randomize?
|
||||
NB_ELEMENTS_PER_LINE = 20 # TODO randomize?
|
||||
NB_ELEMENTS_PER_LINE = 10 # TODO randomize?
|
||||
DMS_NAME = "unit_test_dms"
|
||||
DATA_TYPES = ['OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_IDX']
|
||||
DATA_TYPES = ['OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_STR', 'OBI_SEQ']
|
||||
|
||||
|
||||
def create_test_obidms():
|
||||
@ -58,12 +58,15 @@ def random_obivalue(data_type):
|
||||
elif data_type == "OBI_BOOL" :
|
||||
return randint(0,1)
|
||||
elif data_type == "OBI_CHAR" :
|
||||
nucs = 'atgc'
|
||||
return nucs[randint(0,3)]
|
||||
elif data_type == "OBI_IDX" :
|
||||
length = randint(1,500)
|
||||
return choice(string.ascii_lowercase)
|
||||
elif data_type == "OBI_STR" :
|
||||
length = randint(1,200)
|
||||
randoms = ''.join(choice(string.ascii_lowercase) for i in range(length))
|
||||
return randoms
|
||||
elif data_type == "OBI_SEQ" :
|
||||
length = randint(1,200)
|
||||
randoms = ''.join(choice("atgc") for i in range(length))
|
||||
return randoms
|
||||
|
||||
class OBIDMS_Column_TestCase(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
@ -255,6 +258,30 @@ class OBIDMS_Column_OBI_STR_multiple_elements_TestCase(OBIDMS_Column_multiple_el
|
||||
self.data_type_code,
|
||||
multiple_elements_per_line=True)
|
||||
|
||||
class OBIDMS_Column_OBI_SEQ_TestCase(OBIDMS_Column_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 6
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 6
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.elts_names, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code,
|
||||
multiple_elements_per_line=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(verbosity=2, defaultTest=["OBIDMS_Column_OBI_INT_TestCase",
|
||||
@ -266,6 +293,8 @@ if __name__ == '__main__':
|
||||
"OBIDMS_Column_OBI_CHAR_TestCase",
|
||||
"OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase",
|
||||
"OBIDMS_Column_OBI_STR_TestCase",
|
||||
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase"])
|
||||
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase",
|
||||
"OBIDMS_Column_OBI_SEQ_TestCase",
|
||||
"OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase"])
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user