New column type for DNA sequences. Only for those coded on 2 bits (only

'ATGCatgc') for now.
This commit is contained in:
Celine Mercier
2015-11-19 18:12:48 +01:00
parent e371248567
commit 6ab1c83302
17 changed files with 860 additions and 29 deletions

View File

@ -14,3 +14,5 @@
../../../src/private_at_functions.c
../../../src/obiarray.h
../../../src/obiarray.c
../../../src/encode.h
../../../src/encode.c

View File

@ -47,6 +47,11 @@ from ._obidmscolumn_str cimport OBIDMS_column_str, \
OBIDMS_column_str_multi_elts, \
OBIDMS_column_str_multi_elts_writable
from ._obidmscolumn_seq cimport OBIDMS_column_seq, \
OBIDMS_column_seq_writable, \
OBIDMS_column_seq_multi_elts, \
OBIDMS_column_seq_multi_elts_writable
cdef class OBIDMS :
@ -215,6 +220,17 @@ cdef class OBIDMS :
subclass = OBIDMS_column_str
else :
subclass = OBIDMS_column_str_multi_elts
elif data_type == 6 :
if (create or clone) :
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_seq_writable
else :
subclass = OBIDMS_column_seq_multi_elts_writable
else :
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_seq
else :
subclass = OBIDMS_column_seq_multi_elts
else :
raise Exception("Problem with the data type")
@ -238,7 +254,7 @@ cdef class OBIDMS_column :
bint create,
bint clone, bint clone_data,
obiversion_t version_number,
OBIType_t type,
OBIType_t type, # There's a problem with this with the OBI_IDX columns as there are 2 subtypes
index_t nb_lines,
index_t nb_elements_per_line,
list elements_names,

View File

@ -0,0 +1,18 @@
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiarray.h
../../../src/obiarray.c

View File

@ -0,0 +1,25 @@
#cython: language_level=3
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column
cdef class OBIDMS_column_seq(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_seq_writable(OBIDMS_column_seq):
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_seq_multi_elts(OBIDMS_column_seq):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, str value)
cpdef set_line(self, index_t line_nb, object values)
cdef class OBIDMS_column_seq_multi_elts_writable(OBIDMS_column_seq_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, str value)
cpdef set_line(self, index_t line_nb, object values)
cpdef close(self)

View File

@ -0,0 +1,103 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obiseq_with_elt_name, \
obi_column_get_obiseq_with_elt_idx, \
obi_column_set_obiseq_with_elt_name, \
obi_column_set_obiseq_with_elt_idx
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIIdx_NA, const_char_p
from obitools3.utils cimport str2bytes, bytes2str
cdef class OBIDMS_column_seq(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef bytes value
cdef object result
value = <bytes> obi_column_get_obiseq_with_elt_idx(self.pointer, line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIIdx_NA :
result = None
else :
result = bytes2str(value)
return result
cpdef set_line(self, index_t line_nb, object value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_seq_writable(OBIDMS_column_seq):
cpdef set_line(self, index_t line_nb, object value):
if obi_column_set_obiseq_with_elt_idx(self.pointer, line_nb, 0, str2bytes(value)) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_seq_multi_elts(OBIDMS_column_seq):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef bytes value
cdef object result
value = <bytes> obi_column_get_obiseq_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIIdx_NA :
result = None
else :
result = bytes2str(value)
return result
cpdef object get_line(self, index_t line_nb) :
cdef bytes value
cdef object result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = <bytes> obi_column_get_obiseq_with_elt_idx(self.pointer, line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
result[self.elements_names[i]] = bytes2str(value)
if all_NA and (value != OBIIdx_NA) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, str value):
raise Exception("Column is read-only")
cpdef set_line(self, index_t line_nb, object values):
raise Exception("Column is read-only")
cdef class OBIDMS_column_seq_multi_elts_writable(OBIDMS_column_seq_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, str value):
if obi_column_set_obiseq_with_elt_name(self.pointer, line_nb, str2bytes(element_name), str2bytes(value)) < 0:
raise Exception("Problem setting a value in a column")
cpdef set_line(self, index_t line_nb, object values):
cdef str value
for element_name in values :
value = values[element_name]
self.set_item(line_nb, element_name, value)
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -163,10 +163,30 @@ cdef extern from "obidmscolumn_str.h" nogil:
char* value)
const_char_p obi_column_get_obistr_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
index_t line_nb,
const_char_p element_name)
const_char_p obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
index_t line_nb,
index_t element_idx)
cdef extern from "obidmscolumn_seq.h" nogil:
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
char* value)
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
char* value)
const_char_p obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
const_char_p obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)

View File

@ -10,9 +10,9 @@ from obitools3.obidms._obidms import OBIDMS
LINE_COUNT_FOR_TEST_COLUMN = 10000 # TODO randomize?
SMALLER_LINE_COUNT_FOR_TEST_COLUMN = 1000 # TODO randomize?
NB_ELEMENTS_PER_LINE = 20 # TODO randomize?
NB_ELEMENTS_PER_LINE = 10 # TODO randomize?
DMS_NAME = "unit_test_dms"
DATA_TYPES = ['OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_IDX']
DATA_TYPES = ['OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_STR', 'OBI_SEQ']
def create_test_obidms():
@ -58,12 +58,15 @@ def random_obivalue(data_type):
elif data_type == "OBI_BOOL" :
return randint(0,1)
elif data_type == "OBI_CHAR" :
nucs = 'atgc'
return nucs[randint(0,3)]
elif data_type == "OBI_IDX" :
length = randint(1,500)
return choice(string.ascii_lowercase)
elif data_type == "OBI_STR" :
length = randint(1,200)
randoms = ''.join(choice(string.ascii_lowercase) for i in range(length))
return randoms
elif data_type == "OBI_SEQ" :
length = randint(1,200)
randoms = ''.join(choice("atgc") for i in range(length))
return randoms
class OBIDMS_Column_TestCase(unittest.TestCase):
def tearDown(self):
@ -255,6 +258,30 @@ class OBIDMS_Column_OBI_STR_multiple_elements_TestCase(OBIDMS_Column_multiple_el
self.data_type_code,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_SEQ_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type_code = 6
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code)
class OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type_code = 6
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code,
multiple_elements_per_line=True)
if __name__ == '__main__':
unittest.main(verbosity=2, defaultTest=["OBIDMS_Column_OBI_INT_TestCase",
@ -266,6 +293,8 @@ if __name__ == '__main__':
"OBIDMS_Column_OBI_CHAR_TestCase",
"OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase",
"OBIDMS_Column_OBI_STR_TestCase",
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase"])
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase",
"OBIDMS_Column_OBI_SEQ_TestCase",
"OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase"])