New column type for DNA sequences. Only for those coded on 2 bits (only
'ATGCatgc') for now.
This commit is contained in:
@ -14,3 +14,5 @@
|
|||||||
../../../src/private_at_functions.c
|
../../../src/private_at_functions.c
|
||||||
../../../src/obiarray.h
|
../../../src/obiarray.h
|
||||||
../../../src/obiarray.c
|
../../../src/obiarray.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/encode.c
|
@ -47,6 +47,11 @@ from ._obidmscolumn_str cimport OBIDMS_column_str, \
|
|||||||
OBIDMS_column_str_multi_elts, \
|
OBIDMS_column_str_multi_elts, \
|
||||||
OBIDMS_column_str_multi_elts_writable
|
OBIDMS_column_str_multi_elts_writable
|
||||||
|
|
||||||
|
from ._obidmscolumn_seq cimport OBIDMS_column_seq, \
|
||||||
|
OBIDMS_column_seq_writable, \
|
||||||
|
OBIDMS_column_seq_multi_elts, \
|
||||||
|
OBIDMS_column_seq_multi_elts_writable
|
||||||
|
|
||||||
|
|
||||||
cdef class OBIDMS :
|
cdef class OBIDMS :
|
||||||
|
|
||||||
@ -215,6 +220,17 @@ cdef class OBIDMS :
|
|||||||
subclass = OBIDMS_column_str
|
subclass = OBIDMS_column_str
|
||||||
else :
|
else :
|
||||||
subclass = OBIDMS_column_str_multi_elts
|
subclass = OBIDMS_column_str_multi_elts
|
||||||
|
elif data_type == 6 :
|
||||||
|
if (create or clone) :
|
||||||
|
if nb_elements_per_line == 1 :
|
||||||
|
subclass = OBIDMS_column_seq_writable
|
||||||
|
else :
|
||||||
|
subclass = OBIDMS_column_seq_multi_elts_writable
|
||||||
|
else :
|
||||||
|
if nb_elements_per_line == 1 :
|
||||||
|
subclass = OBIDMS_column_seq
|
||||||
|
else :
|
||||||
|
subclass = OBIDMS_column_seq_multi_elts
|
||||||
else :
|
else :
|
||||||
raise Exception("Problem with the data type")
|
raise Exception("Problem with the data type")
|
||||||
|
|
||||||
@ -238,7 +254,7 @@ cdef class OBIDMS_column :
|
|||||||
bint create,
|
bint create,
|
||||||
bint clone, bint clone_data,
|
bint clone, bint clone_data,
|
||||||
obiversion_t version_number,
|
obiversion_t version_number,
|
||||||
OBIType_t type,
|
OBIType_t type, # There's a problem with this with the OBI_IDX columns as there are 2 subtypes
|
||||||
index_t nb_lines,
|
index_t nb_lines,
|
||||||
index_t nb_elements_per_line,
|
index_t nb_elements_per_line,
|
||||||
list elements_names,
|
list elements_names,
|
||||||
|
18
python/obitools3/obidms/_obidmscolumn_seq.cfiles
Normal file
18
python/obitools3/obidms/_obidmscolumn_seq.cfiles
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/private_at_functions.h
|
||||||
|
../../../src/private_at_functions.c
|
||||||
|
../../../src/obiarray.h
|
||||||
|
../../../src/obiarray.c
|
25
python/obitools3/obidms/_obidmscolumn_seq.pxd
Normal file
25
python/obitools3/obidms/_obidmscolumn_seq.pxd
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from .capi.obitypes cimport index_t
|
||||||
|
from ._obidms cimport OBIDMS_column
|
||||||
|
|
||||||
|
|
||||||
|
cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||||
|
cpdef object get_line(self, index_t line_nb)
|
||||||
|
cpdef set_line(self, index_t line_nb, object value)
|
||||||
|
cpdef close(self)
|
||||||
|
|
||||||
|
cdef class OBIDMS_column_seq_writable(OBIDMS_column_seq):
|
||||||
|
cpdef set_line(self, index_t line_nb, object value)
|
||||||
|
cpdef close(self)
|
||||||
|
|
||||||
|
cdef class OBIDMS_column_seq_multi_elts(OBIDMS_column_seq):
|
||||||
|
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||||
|
cpdef object get_line(self, index_t line_nb)
|
||||||
|
cpdef set_item(self, index_t line_nb, str element_name, str value)
|
||||||
|
cpdef set_line(self, index_t line_nb, object values)
|
||||||
|
|
||||||
|
cdef class OBIDMS_column_seq_multi_elts_writable(OBIDMS_column_seq_multi_elts):
|
||||||
|
cpdef set_item(self, index_t line_nb, str element_name, str value)
|
||||||
|
cpdef set_line(self, index_t line_nb, object values)
|
||||||
|
cpdef close(self)
|
103
python/obitools3/obidms/_obidmscolumn_seq.pyx
Normal file
103
python/obitools3/obidms/_obidmscolumn_seq.pyx
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from .capi.obidmscolumn cimport obi_close_column,\
|
||||||
|
obi_truncate_and_close_column, \
|
||||||
|
obi_column_get_obiseq_with_elt_name, \
|
||||||
|
obi_column_get_obiseq_with_elt_idx, \
|
||||||
|
obi_column_set_obiseq_with_elt_name, \
|
||||||
|
obi_column_set_obiseq_with_elt_idx
|
||||||
|
from .capi.obierrno cimport obi_errno
|
||||||
|
from .capi.obitypes cimport OBIIdx_NA, const_char_p
|
||||||
|
|
||||||
|
from obitools3.utils cimport str2bytes, bytes2str
|
||||||
|
|
||||||
|
|
||||||
|
cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||||
|
|
||||||
|
cpdef object get_line(self, index_t line_nb):
|
||||||
|
cdef bytes value
|
||||||
|
cdef object result
|
||||||
|
value = <bytes> obi_column_get_obiseq_with_elt_idx(self.pointer, line_nb, 0)
|
||||||
|
if obi_errno > 0 :
|
||||||
|
raise IndexError(line_nb)
|
||||||
|
if value == OBIIdx_NA :
|
||||||
|
result = None
|
||||||
|
else :
|
||||||
|
result = bytes2str(value)
|
||||||
|
return result
|
||||||
|
|
||||||
|
cpdef set_line(self, index_t line_nb, object value):
|
||||||
|
raise Exception("Column is read-only")
|
||||||
|
|
||||||
|
cpdef close(self):
|
||||||
|
if obi_close_column(self.pointer) < 0 :
|
||||||
|
raise Exception("Problem closing a column")
|
||||||
|
|
||||||
|
|
||||||
|
cdef class OBIDMS_column_seq_writable(OBIDMS_column_seq):
|
||||||
|
|
||||||
|
cpdef set_line(self, index_t line_nb, object value):
|
||||||
|
if obi_column_set_obiseq_with_elt_idx(self.pointer, line_nb, 0, str2bytes(value)) < 0:
|
||||||
|
raise Exception("Problem setting a value in a column")
|
||||||
|
|
||||||
|
cpdef close(self):
|
||||||
|
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||||
|
raise Exception("Problem closing a column")
|
||||||
|
|
||||||
|
|
||||||
|
cdef class OBIDMS_column_seq_multi_elts(OBIDMS_column_seq):
|
||||||
|
|
||||||
|
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||||
|
cdef bytes value
|
||||||
|
cdef object result
|
||||||
|
value = <bytes> obi_column_get_obiseq_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
|
||||||
|
if obi_errno > 0 :
|
||||||
|
raise IndexError(line_nb, element_name)
|
||||||
|
if value == OBIIdx_NA :
|
||||||
|
result = None
|
||||||
|
else :
|
||||||
|
result = bytes2str(value)
|
||||||
|
return result
|
||||||
|
|
||||||
|
cpdef object get_line(self, index_t line_nb) :
|
||||||
|
cdef bytes value
|
||||||
|
cdef object result
|
||||||
|
cdef index_t i
|
||||||
|
cdef bint all_NA
|
||||||
|
result = {}
|
||||||
|
all_NA = True
|
||||||
|
for i in range(self.nb_elements_per_line) :
|
||||||
|
value = <bytes> obi_column_get_obiseq_with_elt_idx(self.pointer, line_nb, i)
|
||||||
|
if obi_errno > 0 :
|
||||||
|
raise IndexError(line_nb)
|
||||||
|
result[self.elements_names[i]] = bytes2str(value)
|
||||||
|
if all_NA and (value != OBIIdx_NA) :
|
||||||
|
all_NA = False
|
||||||
|
if all_NA :
|
||||||
|
result = None
|
||||||
|
return result
|
||||||
|
|
||||||
|
cpdef set_item(self, index_t line_nb, str element_name, str value):
|
||||||
|
raise Exception("Column is read-only")
|
||||||
|
|
||||||
|
cpdef set_line(self, index_t line_nb, object values):
|
||||||
|
raise Exception("Column is read-only")
|
||||||
|
|
||||||
|
|
||||||
|
cdef class OBIDMS_column_seq_multi_elts_writable(OBIDMS_column_seq_multi_elts):
|
||||||
|
|
||||||
|
cpdef set_item(self, index_t line_nb, str element_name, str value):
|
||||||
|
if obi_column_set_obiseq_with_elt_name(self.pointer, line_nb, str2bytes(element_name), str2bytes(value)) < 0:
|
||||||
|
raise Exception("Problem setting a value in a column")
|
||||||
|
|
||||||
|
cpdef set_line(self, index_t line_nb, object values):
|
||||||
|
cdef str value
|
||||||
|
for element_name in values :
|
||||||
|
value = values[element_name]
|
||||||
|
self.set_item(line_nb, element_name, value)
|
||||||
|
|
||||||
|
cpdef close(self):
|
||||||
|
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||||
|
raise Exception("Problem closing a column")
|
||||||
|
|
||||||
|
|
@ -163,10 +163,30 @@ cdef extern from "obidmscolumn_str.h" nogil:
|
|||||||
char* value)
|
char* value)
|
||||||
|
|
||||||
const_char_p obi_column_get_obistr_with_elt_name(OBIDMS_column_p column,
|
const_char_p obi_column_get_obistr_with_elt_name(OBIDMS_column_p column,
|
||||||
index_t line_nb,
|
index_t line_nb,
|
||||||
const_char_p element_name)
|
const_char_p element_name)
|
||||||
|
|
||||||
const_char_p obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column,
|
const_char_p obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column,
|
||||||
index_t line_nb,
|
index_t line_nb,
|
||||||
index_t element_idx)
|
index_t element_idx)
|
||||||
|
|
||||||
|
cdef extern from "obidmscolumn_seq.h" nogil:
|
||||||
|
|
||||||
|
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column,
|
||||||
|
index_t line_nb,
|
||||||
|
const_char_p element_name,
|
||||||
|
char* value)
|
||||||
|
|
||||||
|
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column,
|
||||||
|
index_t line_nb,
|
||||||
|
index_t element_idx,
|
||||||
|
char* value)
|
||||||
|
|
||||||
|
const_char_p obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column,
|
||||||
|
index_t line_nb,
|
||||||
|
const_char_p element_name)
|
||||||
|
|
||||||
|
const_char_p obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column,
|
||||||
|
index_t line_nb,
|
||||||
|
index_t element_idx)
|
||||||
|
|
||||||
|
@ -10,9 +10,9 @@ from obitools3.obidms._obidms import OBIDMS
|
|||||||
|
|
||||||
LINE_COUNT_FOR_TEST_COLUMN = 10000 # TODO randomize?
|
LINE_COUNT_FOR_TEST_COLUMN = 10000 # TODO randomize?
|
||||||
SMALLER_LINE_COUNT_FOR_TEST_COLUMN = 1000 # TODO randomize?
|
SMALLER_LINE_COUNT_FOR_TEST_COLUMN = 1000 # TODO randomize?
|
||||||
NB_ELEMENTS_PER_LINE = 20 # TODO randomize?
|
NB_ELEMENTS_PER_LINE = 10 # TODO randomize?
|
||||||
DMS_NAME = "unit_test_dms"
|
DMS_NAME = "unit_test_dms"
|
||||||
DATA_TYPES = ['OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_IDX']
|
DATA_TYPES = ['OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_STR', 'OBI_SEQ']
|
||||||
|
|
||||||
|
|
||||||
def create_test_obidms():
|
def create_test_obidms():
|
||||||
@ -58,12 +58,15 @@ def random_obivalue(data_type):
|
|||||||
elif data_type == "OBI_BOOL" :
|
elif data_type == "OBI_BOOL" :
|
||||||
return randint(0,1)
|
return randint(0,1)
|
||||||
elif data_type == "OBI_CHAR" :
|
elif data_type == "OBI_CHAR" :
|
||||||
nucs = 'atgc'
|
return choice(string.ascii_lowercase)
|
||||||
return nucs[randint(0,3)]
|
elif data_type == "OBI_STR" :
|
||||||
elif data_type == "OBI_IDX" :
|
length = randint(1,200)
|
||||||
length = randint(1,500)
|
|
||||||
randoms = ''.join(choice(string.ascii_lowercase) for i in range(length))
|
randoms = ''.join(choice(string.ascii_lowercase) for i in range(length))
|
||||||
return randoms
|
return randoms
|
||||||
|
elif data_type == "OBI_SEQ" :
|
||||||
|
length = randint(1,200)
|
||||||
|
randoms = ''.join(choice("atgc") for i in range(length))
|
||||||
|
return randoms
|
||||||
|
|
||||||
class OBIDMS_Column_TestCase(unittest.TestCase):
|
class OBIDMS_Column_TestCase(unittest.TestCase):
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
@ -255,6 +258,30 @@ class OBIDMS_Column_OBI_STR_multiple_elements_TestCase(OBIDMS_Column_multiple_el
|
|||||||
self.data_type_code,
|
self.data_type_code,
|
||||||
multiple_elements_per_line=True)
|
multiple_elements_per_line=True)
|
||||||
|
|
||||||
|
class OBIDMS_Column_OBI_SEQ_TestCase(OBIDMS_Column_TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.data_type_code = 6
|
||||||
|
self.dms, \
|
||||||
|
self.dms_name, \
|
||||||
|
self.dms_dir_name = create_test_obidms()
|
||||||
|
self.col, \
|
||||||
|
self.col_name, \
|
||||||
|
self.data_type_str = create_test_column(self.dms,
|
||||||
|
self.data_type_code)
|
||||||
|
|
||||||
|
|
||||||
|
class OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.data_type_code = 6
|
||||||
|
self.dms, \
|
||||||
|
self.dms_name, \
|
||||||
|
self.dms_dir_name = create_test_obidms()
|
||||||
|
self.col, \
|
||||||
|
self.col_name, \
|
||||||
|
self.elts_names, \
|
||||||
|
self.data_type_str = create_test_column(self.dms,
|
||||||
|
self.data_type_code,
|
||||||
|
multiple_elements_per_line=True)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main(verbosity=2, defaultTest=["OBIDMS_Column_OBI_INT_TestCase",
|
unittest.main(verbosity=2, defaultTest=["OBIDMS_Column_OBI_INT_TestCase",
|
||||||
@ -266,6 +293,8 @@ if __name__ == '__main__':
|
|||||||
"OBIDMS_Column_OBI_CHAR_TestCase",
|
"OBIDMS_Column_OBI_CHAR_TestCase",
|
||||||
"OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase",
|
"OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase",
|
||||||
"OBIDMS_Column_OBI_STR_TestCase",
|
"OBIDMS_Column_OBI_STR_TestCase",
|
||||||
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase"])
|
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase",
|
||||||
|
"OBIDMS_Column_OBI_SEQ_TestCase",
|
||||||
|
"OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase"])
|
||||||
|
|
||||||
|
|
||||||
|
180
src/encode.c
Normal file
180
src/encode.c
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* Encoding functions *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file encode.c
|
||||||
|
* @author Celine Mercier
|
||||||
|
* @date November 18th 2015
|
||||||
|
* @brief Functions encoding DNA sequences.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "encode.h"
|
||||||
|
#include "obiarray.h"
|
||||||
|
#include "obidebug.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: endianness problem?
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool only_ATGC(char* seq)
|
||||||
|
{
|
||||||
|
char* c = seq;
|
||||||
|
|
||||||
|
while (*c)
|
||||||
|
{
|
||||||
|
if (!((*c == 'A') || \
|
||||||
|
(*c == 'T') || \
|
||||||
|
(*c == 'G') || \
|
||||||
|
(*c == 'C') || \
|
||||||
|
(*c == 'a') || \
|
||||||
|
(*c == 't') || \
|
||||||
|
(*c == 'g') || \
|
||||||
|
(*c == 'c')))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
byte_t* encode_seq_on_2_bits(char* seq, int32_t length) // TODO shift = 2
|
||||||
|
{
|
||||||
|
byte_t* seq_b;
|
||||||
|
uint8_t shift;
|
||||||
|
int32_t length_b;
|
||||||
|
int32_t i;
|
||||||
|
|
||||||
|
// fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>Encoding sequence %s", seq);
|
||||||
|
|
||||||
|
length_b = ceil((double) length / (double) 4.0);
|
||||||
|
|
||||||
|
// fprintf(stderr, "\nLength: %d", length_b);
|
||||||
|
|
||||||
|
seq_b = (byte_t*) malloc(length_b * sizeof(byte_t));
|
||||||
|
|
||||||
|
memset(seq_b, 0, length_b);
|
||||||
|
|
||||||
|
for (i=0; i<length; i++)
|
||||||
|
{
|
||||||
|
shift = 6 - 2*(i%4);
|
||||||
|
// fprintf(stderr, "\nshift: %u", shift);
|
||||||
|
|
||||||
|
switch (seq[i])
|
||||||
|
{
|
||||||
|
case 'a':
|
||||||
|
case 'A':
|
||||||
|
seq_b[i/4] |= NUC_A << shift;
|
||||||
|
// fprintf(stderr, "\nIn byte %d, writing A:", i/4);
|
||||||
|
// print_bits(seq_b, length_b);
|
||||||
|
break;
|
||||||
|
case 'c':
|
||||||
|
case 'C':
|
||||||
|
seq_b[i/4] |= NUC_C << shift;
|
||||||
|
// fprintf(stderr, "\nIn byte %d, writing C:", i/4);
|
||||||
|
// print_bits(seq_b, length_b);
|
||||||
|
break;
|
||||||
|
case 'g':
|
||||||
|
case 'G':
|
||||||
|
seq_b[i/4] |= NUC_G << shift;
|
||||||
|
// fprintf(stderr, "\nIn byte %d, writing G:", i/4);
|
||||||
|
// print_bits(seq_b, length_b);
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
case 'T':
|
||||||
|
seq_b[i/4] |= NUC_T << shift;
|
||||||
|
// fprintf(stderr, "\nIn byte %d, writing T:", i/4);
|
||||||
|
// print_bits(seq_b, length_b);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
obidebug(1, "\nInvalid nucleotide base when encoding (not [atgcATGC])");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fprintf(stderr, "\n>>>>>>>>>Encoded:");
|
||||||
|
// print_bits(seq_b, length_b);
|
||||||
|
|
||||||
|
return seq_b;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
|
||||||
|
{
|
||||||
|
char* seq;
|
||||||
|
int32_t i;
|
||||||
|
uint8_t shift;
|
||||||
|
uint8_t mask;
|
||||||
|
uint8_t nuc;
|
||||||
|
|
||||||
|
seq = (char*) malloc((length_seq+1) * sizeof(char));
|
||||||
|
|
||||||
|
for (i=0; i<length_seq; i++)
|
||||||
|
{
|
||||||
|
shift = 6 - 2*(i % 4);
|
||||||
|
mask = NUC_MASK << shift;
|
||||||
|
nuc = (seq_b[i/4] & mask) >> shift;
|
||||||
|
|
||||||
|
switch (nuc)
|
||||||
|
{
|
||||||
|
case NUC_A:
|
||||||
|
seq[i] = 'a';
|
||||||
|
break;
|
||||||
|
case NUC_C:
|
||||||
|
seq[i] = 'c';
|
||||||
|
break;
|
||||||
|
case NUC_G:
|
||||||
|
seq[i] = 'g';
|
||||||
|
break;
|
||||||
|
case NUC_T:
|
||||||
|
seq[i] = 't';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
obidebug(1, "\nInvalid nucleotide base when decoding");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seq[length_seq] = '\0';
|
||||||
|
|
||||||
|
return seq;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////// FOR DEBUGGING ///////////
|
||||||
|
|
||||||
|
// little endian
|
||||||
|
void print_bits(void* ptr, int32_t size)
|
||||||
|
{
|
||||||
|
uint8_t* b = (uint8_t*) ptr;
|
||||||
|
uint8_t byte;
|
||||||
|
int32_t i, j;
|
||||||
|
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
for (i=0;i<size;i++)
|
||||||
|
{
|
||||||
|
for (j=7;j>=0;j--)
|
||||||
|
{
|
||||||
|
byte = b[i] & (1<<j);
|
||||||
|
byte >>= j;
|
||||||
|
fprintf(stderr, "%u", byte);
|
||||||
|
}
|
||||||
|
fprintf(stderr, " ");
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
}
|
95
src/encode.h
Normal file
95
src/encode.h
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* Encoding header file *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file encode.h
|
||||||
|
* @author Celine Mercier
|
||||||
|
* @date November 18th 2015
|
||||||
|
* @brief Header file for encoding DNA sequences.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include "obiarray.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define NUC_MASK 0x3 /**< Binary: 11 to use when decoding */
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief enum for the 2-bits codes for each of the 4 nucleotides.
|
||||||
|
*/
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
NUC_A = 0x0, /* binary: 00 */
|
||||||
|
NUC_C = 0x1, /* binary: 01 */
|
||||||
|
NUC_G = 0x2, /* binary: 10 */
|
||||||
|
NUC_T = 0x3, /* binary: 11 */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Checks if there are only 'atgcATGC' characters in a
|
||||||
|
* character string.
|
||||||
|
*
|
||||||
|
* @param seq The sequence to check.
|
||||||
|
*
|
||||||
|
* @returns A boolean value indicating if there are only
|
||||||
|
* 'atgcATGC' characters in a character string.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
bool only_ATGC(char* seq);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
|
||||||
|
*
|
||||||
|
* A or a : 00
|
||||||
|
* C or c : 01
|
||||||
|
* T or t : 10
|
||||||
|
* G or g : 11
|
||||||
|
*
|
||||||
|
* @warning The DNA sequence must contain only 'atgcATGC' characters.
|
||||||
|
*
|
||||||
|
* @param seq The sequence to encode.
|
||||||
|
* @param length The length of the sequence to encode.
|
||||||
|
*
|
||||||
|
* @returns The encoded sequence.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
byte_t* encode_seq_on_2_bits(char* seq, int32_t length);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Decodes a DNA sequence that is coded with each nucleotide on 2 bits.
|
||||||
|
*
|
||||||
|
* A or a : 00
|
||||||
|
* C or c : 01
|
||||||
|
* T or t : 10
|
||||||
|
* G or g : 11
|
||||||
|
*
|
||||||
|
* @param seq The sequence to decode.
|
||||||
|
* @param length_seq The initial length of the sequence before it was encoded.
|
||||||
|
*
|
||||||
|
* @returns The decoded sequence ended with '\0'.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
|
||||||
|
|
||||||
|
|
||||||
|
////////// FOR DEBUGGING ///////////
|
||||||
|
|
||||||
|
// little endian
|
||||||
|
void print_bits(void* ptr, int32_t length);
|
||||||
|
|
@ -24,6 +24,7 @@
|
|||||||
#include "obitypes.h"
|
#include "obitypes.h"
|
||||||
#include "obidebug.h"
|
#include "obidebug.h"
|
||||||
#include "private_at_functions.h"
|
#include "private_at_functions.h"
|
||||||
|
#include "encode.h"
|
||||||
|
|
||||||
|
|
||||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||||
@ -446,6 +447,8 @@ int array_compare(byte_t* value_1, byte_t* value_2)
|
|||||||
uint8_t size_2;
|
uint8_t size_2;
|
||||||
int32_t len_1;
|
int32_t len_1;
|
||||||
int32_t len_2;
|
int32_t len_2;
|
||||||
|
int32_t ini_len_1;
|
||||||
|
int32_t ini_len_2;
|
||||||
int32_t b;
|
int32_t b;
|
||||||
|
|
||||||
//obidebug(1, "\nCOMPARING 1=%d,%.*s; 2=%d,%.*s", *((int32_t*)(value_1+1)), *((int32_t*)(value_1+1)), value_1+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_2+1)), *((int32_t*)(value_2+1)), value_2+BYTE_ARRAY_HEADER_SIZE);
|
//obidebug(1, "\nCOMPARING 1=%d,%.*s; 2=%d,%.*s", *((int32_t*)(value_1+1)), *((int32_t*)(value_1+1)), value_1+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_2+1)), *((int32_t*)(value_2+1)), value_2+BYTE_ARRAY_HEADER_SIZE);
|
||||||
@ -462,6 +465,15 @@ int array_compare(byte_t* value_1, byte_t* value_2)
|
|||||||
if (len_1 != len_2)
|
if (len_1 != len_2)
|
||||||
return (len_1 - len_2);
|
return (len_1 - len_2);
|
||||||
|
|
||||||
|
if (size_1 != 8)
|
||||||
|
{
|
||||||
|
ini_len_1 = *((int32_t*)(value_1+5));
|
||||||
|
ini_len_2 = *((int32_t*)(value_2+5));
|
||||||
|
|
||||||
|
if (ini_len_1 != ini_len_2)
|
||||||
|
return (ini_len_1 - ini_len_2);
|
||||||
|
}
|
||||||
|
|
||||||
b = BYTE_ARRAY_HEADER_SIZE;
|
b = BYTE_ARRAY_HEADER_SIZE;
|
||||||
comp = 0;
|
comp = 0;
|
||||||
while (!comp && (b < len_1+BYTE_ARRAY_HEADER_SIZE))
|
while (!comp && (b < len_1+BYTE_ARRAY_HEADER_SIZE))
|
||||||
@ -475,7 +487,7 @@ int array_compare(byte_t* value_1, byte_t* value_2)
|
|||||||
|
|
||||||
size_t array_sizeof(byte_t* value)
|
size_t array_sizeof(byte_t* value)
|
||||||
{
|
{
|
||||||
return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)) + 1);
|
return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -995,6 +1007,8 @@ index_t obi_array_add(OBIDMS_array_p array, byte_t* value)
|
|||||||
(array->first)[idx] = data_size_used;
|
(array->first)[idx] = data_size_used;
|
||||||
|
|
||||||
// Store the value itself at the end of the data
|
// Store the value itself at the end of the data
|
||||||
|
// fprintf(stderr, "\nMEMCOPYING TO STORE, with size %ld :", value_size);
|
||||||
|
// printBits(value_size, value);
|
||||||
memcpy((((array->data)->data)+data_size_used), value, value_size);
|
memcpy((((array->data)->data)+data_size_used), value, value_size);
|
||||||
|
|
||||||
// Update the data size
|
// Update the data size
|
||||||
@ -1079,8 +1093,8 @@ byte_t* obi_str_to_obibytes(char* value)
|
|||||||
uint8_t size;
|
uint8_t size;
|
||||||
|
|
||||||
size = 8;
|
size = 8;
|
||||||
length = strlen(value);
|
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
|
||||||
value_b = (byte_t*) malloc(length + BYTE_ARRAY_HEADER_SIZE + 1);
|
value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length);
|
||||||
if (value_b == NULL)
|
if (value_b == NULL)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_ARRAY_ERROR);
|
obi_set_errno(OBI_ARRAY_ERROR);
|
||||||
@ -1090,7 +1104,8 @@ byte_t* obi_str_to_obibytes(char* value)
|
|||||||
|
|
||||||
*(value_b) = size;
|
*(value_b) = size;
|
||||||
|
|
||||||
*((int32_t*)(value_b+1)) = length;
|
*((int32_t*)(value_b+1)) = length; // TODO comment
|
||||||
|
*((int32_t*)(value_b+5)) = length;
|
||||||
|
|
||||||
strcpy(value_b+BYTE_ARRAY_HEADER_SIZE, value);
|
strcpy(value_b+BYTE_ARRAY_HEADER_SIZE, value);
|
||||||
|
|
||||||
@ -1107,3 +1122,73 @@ const char* obi_obibytes_to_str(byte_t* value_b)
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
byte_t* obi_seq_to_obibytes(char* seq)
|
||||||
|
{
|
||||||
|
byte_t* value_b;
|
||||||
|
int32_t length; // length of the value (without the header) in bytes
|
||||||
|
uint8_t size; // size of one element in bits
|
||||||
|
int32_t seq_length;
|
||||||
|
byte_t* encoded_seq;
|
||||||
|
|
||||||
|
// Check if just ATGC and set size of a nucleotide accordingly (2 bits or 4 bits)
|
||||||
|
//fprintf(stderr, "\nonly ATGC = %d", only_ATGC(seq));
|
||||||
|
if (only_ATGC(seq))
|
||||||
|
size = 2;
|
||||||
|
else
|
||||||
|
size = 4;
|
||||||
|
|
||||||
|
// Set length
|
||||||
|
seq_length = strlen(seq);
|
||||||
|
if (size == 2)
|
||||||
|
length = ceil((double) seq_length / (double) 4.0);
|
||||||
|
else // size == 4
|
||||||
|
length = ceil((double) seq_length / (double) 2.0);
|
||||||
|
|
||||||
|
// Encode
|
||||||
|
if (size == 2)
|
||||||
|
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
|
||||||
|
else // size == 4
|
||||||
|
return NULL;
|
||||||
|
// encoded_seq = encode_seq_on_4_bits(seq, seq_length);
|
||||||
|
|
||||||
|
// Set the values in the byte array
|
||||||
|
value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length);
|
||||||
|
|
||||||
|
*(value_b) = size;
|
||||||
|
*((int32_t*)(value_b+1)) = length;
|
||||||
|
*((int32_t*)(value_b+5)) = seq_length;
|
||||||
|
|
||||||
|
//fprintf(stderr, "\nstored seq length : %d\n", *((int32_t*)(value_b+5)));
|
||||||
|
|
||||||
|
memcpy(value_b+BYTE_ARRAY_HEADER_SIZE, encoded_seq, length);
|
||||||
|
|
||||||
|
//obidebug(1, "\n\nENCODED VALUE_B = ");
|
||||||
|
//printBits(((*((int32_t*)(value_b+1)))+BYTE_ARRAY_HEADER_SIZE), value_b);
|
||||||
|
|
||||||
|
free(encoded_seq);
|
||||||
|
|
||||||
|
return value_b;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char* obi_obibytes_to_seq(byte_t* value_b)
|
||||||
|
{
|
||||||
|
const char* value;
|
||||||
|
uint8_t size; // size of one element in bits
|
||||||
|
|
||||||
|
//obidebug(1, "\n\nGONNA DECODE VALUE_B = ");
|
||||||
|
//printBits(((*((int32_t*)(value_b+1)))+BYTE_ARRAY_HEADER_SIZE), value_b);
|
||||||
|
|
||||||
|
size = *(value_b);
|
||||||
|
|
||||||
|
// Decode
|
||||||
|
if (size == 2)
|
||||||
|
value = decode_seq_on_2_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5)));
|
||||||
|
else
|
||||||
|
return NULL;
|
||||||
|
// value = decode_seq_on_4_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5)));
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
*/
|
*/
|
||||||
#define ARRAY_GROWTH_FACTOR (2) /**< The growth factor when an array is enlarged.
|
#define ARRAY_GROWTH_FACTOR (2) /**< The growth factor when an array is enlarged.
|
||||||
*/
|
*/
|
||||||
#define BYTE_ARRAY_HEADER_SIZE (5) /**< The size of the header of a byte array.
|
#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
@ -284,5 +284,34 @@ byte_t* obi_str_to_obibytes(char* value);
|
|||||||
const char* obi_obibytes_to_str(byte_t* value_b);
|
const char* obi_obibytes_to_str(byte_t* value_b);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Converts a DNA sequence to a byte array with a header.
|
||||||
|
*
|
||||||
|
* @warning The byte array must be freed by the caller.
|
||||||
|
*
|
||||||
|
* @param value The DNA sequence to convert.
|
||||||
|
*
|
||||||
|
* @returns A pointer to the byte array created.
|
||||||
|
* @retval NULL if an error occurred.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
byte_t* obi_seq_to_obibytes(char* seq);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Converts a byte array to a DNA sequence.
|
||||||
|
*
|
||||||
|
* @param value_b The byte array to convert.
|
||||||
|
*
|
||||||
|
* @returns A pointer to the DNA sequence contained in the byte array.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
const char* obi_obibytes_to_seq(byte_t* value_b);
|
||||||
|
|
||||||
|
|
||||||
#endif /* OBIARRAY_H_ */
|
#endif /* OBIARRAY_H_ */
|
||||||
|
|
||||||
|
@ -533,12 +533,12 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
obidebug(1, "\nCan't create column because of empty column name");
|
obidebug(1, "\nCan't create column because of empty column name");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if ((data_type < 1) || (data_type > 5))
|
if ((data_type < 1) || (data_type > 6))
|
||||||
{
|
{
|
||||||
obidebug(1, "\nCan't create column because of invalid data type");
|
obidebug(1, "\nCan't create column because of invalid data type");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if ((data_type == 5) && (array_name == NULL))
|
if (((data_type == 5) || (data_type == 6)) && (array_name == NULL))
|
||||||
{
|
{
|
||||||
obidebug(1, "\nCan't create column because of empty array name");
|
obidebug(1, "\nCan't create column because of empty array name");
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -701,8 +701,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
if (comments != NULL)
|
if (comments != NULL)
|
||||||
strncpy(header->comments, comments, COMMENTS_MAX_LENGTH);
|
strncpy(header->comments, comments, COMMENTS_MAX_LENGTH);
|
||||||
|
|
||||||
// If the data type is OBI_IDX, the associated obi_array is opened or created
|
// If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created
|
||||||
if (data_type == 5)
|
if ((data_type == 5) || (data_type == 6))
|
||||||
{
|
{
|
||||||
array = obi_array(dms, array_name);
|
array = obi_array(dms, array_name);
|
||||||
if (array == NULL)
|
if (array == NULL)
|
||||||
@ -838,8 +838,8 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
|||||||
|
|
||||||
column->writable = false;
|
column->writable = false;
|
||||||
|
|
||||||
// If the data type is OBI_IDX, the associated obi_array is opened or created
|
// If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created
|
||||||
if ((column->header)->data_type == 5)
|
if (((column->header)->data_type == 5) || ((column->header)->data_type == 6))
|
||||||
{
|
{
|
||||||
array = obi_array(dms, (column->header)->array_name);
|
array = obi_array(dms, (column->header)->array_name);
|
||||||
if (array == NULL)
|
if (array == NULL)
|
||||||
@ -1175,7 +1175,8 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OBI_IDX: for (i=start;i<end;i++)
|
case OBI_STR:
|
||||||
|
case OBI_SEQ: for (i=start;i<end;i++)
|
||||||
{
|
{
|
||||||
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
|
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
|
||||||
}
|
}
|
||||||
|
120
src/obidmscolumn_seq.c
Normal file
120
src/obidmscolumn_seq.c
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* OBIDMS_column_seq functions *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file obidsmcolumn_seq.c
|
||||||
|
* @author Celine Mercier
|
||||||
|
* @date November 18th 2015
|
||||||
|
* @brief Functions handling OBIColumns containing data in the form of indices referring to DNA sequences.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "obidmscolumn.h"
|
||||||
|
#include "obitypes.h"
|
||||||
|
#include "obierrno.h"
|
||||||
|
#include "obidebug.h"
|
||||||
|
#include "obiarray.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||||
|
|
||||||
|
|
||||||
|
/**********************************************************************
|
||||||
|
*
|
||||||
|
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
|
||||||
|
*
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value)
|
||||||
|
{
|
||||||
|
byte_t* value_b;
|
||||||
|
index_t idx;
|
||||||
|
|
||||||
|
// Check that the line number is not greater than the maximum allowed
|
||||||
|
if (line_nb >= MAXIMUM_LINE_COUNT)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
|
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the file needs to be enlarged
|
||||||
|
while ((line_nb+1) > (column->header)->line_count)
|
||||||
|
{
|
||||||
|
// Enlarge the file
|
||||||
|
if (obi_enlarge_column(column) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update lines used
|
||||||
|
if ((line_nb+1) > (column->header)->lines_used)
|
||||||
|
(column->header)->lines_used = line_nb+1;
|
||||||
|
|
||||||
|
// Encode the value on a byte array with a header
|
||||||
|
value_b = obi_seq_to_obibytes(value);
|
||||||
|
if (value_b == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Add in the obiarray
|
||||||
|
idx = obi_array_add(column->array, value_b);
|
||||||
|
if (idx == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Add the value's index in the column
|
||||||
|
*(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = idx;
|
||||||
|
|
||||||
|
free(value_b);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
|
||||||
|
{
|
||||||
|
index_t idx;
|
||||||
|
byte_t* value_b;
|
||||||
|
|
||||||
|
if ((line_nb+1) > (column->header)->lines_used)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
|
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
|
||||||
|
return "\0"; // TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
idx = *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
|
||||||
|
|
||||||
|
// Check NA
|
||||||
|
if (idx == OBIIdx_NA)
|
||||||
|
return "\0"; // TODO
|
||||||
|
|
||||||
|
value_b = obi_array_get(column->array, idx);
|
||||||
|
return obi_obibytes_to_seq(value_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, char* value)
|
||||||
|
{
|
||||||
|
index_t element_idx;
|
||||||
|
element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||||
|
if (element_idx == OBIIdx_NA)
|
||||||
|
return -1;
|
||||||
|
if (obi_column_set_obiseq_with_elt_idx(column, line_nb, element_idx, value) < 0)
|
||||||
|
return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
|
||||||
|
{
|
||||||
|
index_t element_idx;
|
||||||
|
|
||||||
|
element_idx = obi_column_get_element_index_from_name(column, element_name);
|
||||||
|
if (element_idx == OBIIdx_NA)
|
||||||
|
return "\0";
|
||||||
|
return obi_column_get_obiseq_with_elt_idx(column, line_nb, element_idx);
|
||||||
|
}
|
||||||
|
|
101
src/obidmscolumn_seq.h
Normal file
101
src/obidmscolumn_seq.h
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
* OBIDMS_column_seq header file *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file obidsmcolumn_seq.h
|
||||||
|
* @author Celine Mercier
|
||||||
|
* @date Novemeber 18th 2015
|
||||||
|
* @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to DNA sequences.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef OBIDMSCOLUMN_SEQ_H_
|
||||||
|
#define OBIDMSCOLUMN_SEQ_H_
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "obidmscolumn.h"
|
||||||
|
#include "obitypes.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||||
|
* to DNA sequences in an obiarray, using the index of the element in the line.
|
||||||
|
*
|
||||||
|
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||||
|
*
|
||||||
|
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||||
|
* @param line_nb The number of the line where the value should be set.
|
||||||
|
* @param element_idx The index of the element that should be set in the line.
|
||||||
|
* @param value The value that should be set.
|
||||||
|
*
|
||||||
|
* @returns An integer value indicating the success of the operation.
|
||||||
|
* @retval 0 on success.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||||
|
* to DNA sequences in an obiarray, using the index of the element in the line.
|
||||||
|
*
|
||||||
|
* @param column A pointer as returned by obi_create_column().
|
||||||
|
* @param line_nb The number of the line where the value should be recovered.
|
||||||
|
* @param element_idx The index of the element that should be recovered in the line.
|
||||||
|
*
|
||||||
|
* @returns The recovered value.
|
||||||
|
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
|
||||||
|
* to DNA sequences in an obiarray, using the name of the element in the line.
|
||||||
|
*
|
||||||
|
* @warning Pointers returned by obi_open_column() don't allow writing.
|
||||||
|
*
|
||||||
|
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||||
|
* @param line_nb The number of the line where the value should be set.
|
||||||
|
* @param element_name The name of the element that should be set in the line.
|
||||||
|
* @param value The value that should be set.
|
||||||
|
*
|
||||||
|
* @returns An integer value indicating the success of the operation.
|
||||||
|
* @retval 0 on success.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, char* value);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
|
||||||
|
* to DNA sequences in an obiarray, using the name of the element in the line.
|
||||||
|
*
|
||||||
|
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
|
||||||
|
* @param line_nb The number of the line where the value should be recovered.
|
||||||
|
* @param element_name The name of the element that should be recovered in the line.
|
||||||
|
*
|
||||||
|
* @returns The recovered value.
|
||||||
|
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
|
||||||
|
*
|
||||||
|
* @since November 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
const char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* OBIDMSCOLUMN_SEQ_H_ */
|
||||||
|
|
@ -97,5 +97,5 @@ int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb,
|
|||||||
const char* obi_column_get_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
|
const char* obi_column_get_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
|
||||||
|
|
||||||
|
|
||||||
#endif /* OBIDMSCOLUMN_IDX_H_ */
|
#endif /* OBIDMSCOLUMN_STR_H_ */
|
||||||
|
|
||||||
|
@ -40,7 +40,10 @@ size_t obi_sizeof(OBIType_t type)
|
|||||||
case OBI_CHAR: size = sizeof(obichar_t);
|
case OBI_CHAR: size = sizeof(obichar_t);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OBI_IDX: size = sizeof(index_t);
|
case OBI_STR: size = sizeof(index_t);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OBI_SEQ: size = sizeof(index_t);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default: size = 0;
|
default: size = 0;
|
||||||
@ -90,7 +93,10 @@ char* name_data_type(int data_type)
|
|||||||
case OBI_CHAR: name = strdup("OBI_CHAR");
|
case OBI_CHAR: name = strdup("OBI_CHAR");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OBI_IDX: name = strdup("OBI_IDX");
|
case OBI_STR: name = strdup("OBI_STR");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OBI_SEQ: name = strdup("OBI_SEQ");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,7 +44,8 @@ typedef enum OBIType {
|
|||||||
OBI_FLOAT, /**< a floating value (C type : double) */
|
OBI_FLOAT, /**< a floating value (C type : double) */
|
||||||
OBI_BOOL, /**< a boolean true/false value, see obibool_t enum */
|
OBI_BOOL, /**< a boolean true/false value, see obibool_t enum */
|
||||||
OBI_CHAR, /**< a character (C type : char) */
|
OBI_CHAR, /**< a character (C type : char) */
|
||||||
OBI_IDX /**< an index in a data structure (C type : int64_t) */
|
OBI_STR, /**< an index in a data structure (C type : int64_t) referring to a character string*/
|
||||||
|
OBI_SEQ /**< an index in a data structure (C type : int64_t) referring to a DNA sequence*/
|
||||||
} OBIType_t, *OBIType_p;
|
} OBIType_t, *OBIType_p;
|
||||||
|
|
||||||
|
|
||||||
@ -52,7 +53,7 @@ typedef int64_t index_t;
|
|||||||
typedef int32_t obiint_t;
|
typedef int32_t obiint_t;
|
||||||
typedef double obifloat_t;
|
typedef double obifloat_t;
|
||||||
typedef char obichar_t;
|
typedef char obichar_t;
|
||||||
|
// TODO same for obistr_t and obiseq_t ?
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Union used to compute the NA value of the OBI_FLOAT OBIType.
|
* @brief Union used to compute the NA value of the OBI_FLOAT OBIType.
|
||||||
|
Reference in New Issue
Block a user