From 3b59043ea8172fd82701fc610ecea02094978d12 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Fri, 20 May 2016 16:45:22 +0200 Subject: [PATCH] Major update: New column type to store sequence qualities. Closes #41 --- python/obitools3/commands/import.pyx | 7 +- python/obitools3/obidms/_obidms.cfiles | 32 +-- python/obitools3/obidms/_obidms.pxd | 1 + python/obitools3/obidms/_obidms.pyx | 15 +- .../obidms/_obidmscolumn_bool.cfiles | 32 +-- .../obidms/_obidmscolumn_char.cfiles | 32 +-- .../obidms/_obidmscolumn_float.cfiles | 32 +-- .../obitools3/obidms/_obidmscolumn_int.cfiles | 32 +-- .../obidms/_obidmscolumn_qual.cfiles | 59 ++++++ .../obitools3/obidms/_obidmscolumn_qual.pxd | 17 ++ .../obitools3/obidms/_obidmscolumn_qual.pyx | 151 ++++++++++++++ .../obitools3/obidms/_obidmscolumn_seq.cfiles | 32 +-- .../obitools3/obidms/_obidmscolumn_str.cfiles | 32 +-- python/obitools3/obidms/_obiseq.cfiles | 32 +-- python/obitools3/obidms/_obiseq.pxd | 8 + python/obitools3/obidms/_obiseq.pyx | 26 ++- python/obitools3/obidms/_obitaxo.cfiles | 32 +-- python/obitools3/obidms/capi/obidmscolumn.pxd | 45 ++++ python/obitools3/obidms/capi/obitypes.pxd | 4 + python/obitools3/obidms/capi/obiview.pxd | 51 +++++ python/obitools3/obigrep.py | 2 +- src/char_str_indexer.c | 17 +- src/char_str_indexer.h | 4 +- src/dna_seq_indexer.c | 1 + src/obiblob.h | 2 + src/obidmscolumn_qual.c | 88 ++++++-- src/obidmscolumn_qual.h | 36 ++-- src/obidmscolumn_str.c | 11 - src/obitypes.h | 2 +- src/obiview.c | 81 +++++++- src/obiview.h | 194 ++++++++++++++++++ src/uint8_indexer.c | 60 ++---- src/uint8_indexer.h | 59 +++--- 33 files changed, 962 insertions(+), 267 deletions(-) create mode 100644 python/obitools3/obidms/_obidmscolumn_qual.cfiles create mode 100644 python/obitools3/obidms/_obidmscolumn_qual.pxd create mode 100644 python/obitools3/obidms/_obidmscolumn_qual.pyx diff --git a/python/obitools3/commands/import.pyx b/python/obitools3/commands/import.pyx index 923a63a..fb41c83 100644 --- a/python/obitools3/commands/import.pyx +++ b/python/obitools3/commands/import.pyx @@ -35,7 +35,6 @@ def addOptions(parser): type=str, help="Name of the default DMS for reading and writing data") - group.add_argument('--destination-view','-v', action="store", dest="import:destview", metavar='', @@ -96,12 +95,14 @@ def run(config): inputs = uopen(config['import']['filename']) + get_quality = False if config['import']['seqinformat']=='fasta': iseq = fastaIterator(inputs) view_type="NUC_SEQS_VIEW" elif config['import']['seqinformat']=='fastq': iseq = fastqIterator(inputs) view_type="NUC_SEQS_VIEW" + get_quality = True else: raise RuntimeError('No file format specified') @@ -120,13 +121,15 @@ def run(config): view[i].set_id(seq['id']) view[i].set_definition(seq['definition']) view[i].set_sequence(seq['sequence']) + if get_quality : + view[i].set_quality(seq['quality']) for tag in seq['tags'] : #print(tag, seq['tags'][tag]) #if seq['tags'][tag] not in NA_list : view[i][tag] = seq['tags'][tag] i+=1 - #print(view) + #print(i) print(view.__repr__()) view.save_and_close() diff --git a/python/obitools3/obidms/_obidms.cfiles b/python/obitools3/obidms/_obidms.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obidms.cfiles +++ b/python/obitools3/obidms/_obidms.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obidms.pxd b/python/obitools3/obidms/_obidms.pxd index b0e06b4..0b5480b 100644 --- a/python/obitools3/obidms/_obidms.pxd +++ b/python/obitools3/obidms/_obidms.pxd @@ -70,6 +70,7 @@ cdef class OBIView_NUC_SEQS(OBIView): cdef OBIDMS_column ids cdef OBIDMS_column sequences cdef OBIDMS_column definitions + cdef OBIDMS_column qualities cpdef delete_column(self, str column_name) diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx index a034704..c74e7ca 100644 --- a/python/obitools3/obidms/_obidms.pyx +++ b/python/obitools3/obidms/_obidms.pyx @@ -17,6 +17,7 @@ from .capi.obitypes cimport const_char_p, \ OBI_FLOAT, \ OBI_BOOL, \ OBI_CHAR, \ + OBI_QUAL, \ OBI_STR, \ OBI_SEQ, \ name_data_type, \ @@ -43,6 +44,8 @@ from ._obidmscolumn_bool cimport OBIDMS_column_bool, \ from ._obidmscolumn_char cimport OBIDMS_column_char, \ OBIDMS_column_multi_elts_char +from ._obidmscolumn_qual cimport OBIDMS_column_qual + from ._obidmscolumn_str cimport OBIDMS_column_str, \ OBIDMS_column_multi_elts_str @@ -71,7 +74,8 @@ from .capi.obiview cimport Obiview_p, \ VIEW_TYPE_NUC_SEQS, \ NUC_SEQUENCE_COLUMN, \ ID_COLUMN, \ - DEFINITION_COLUMN + DEFINITION_COLUMN, \ + QUALITY_COLUMN from libc.stdlib cimport malloc from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer @@ -187,6 +191,11 @@ cdef class OBIDMS_column : subclass = OBIDMS_column_char else : subclass = OBIDMS_column_multi_elts_char + elif col_type == OBI_QUAL : + if col_one_element_per_line : + subclass = OBIDMS_column_qual +# else : # TODO +# subclass = OBIDMS_column_multi_elts_qual elif col_type == OBI_STR : if col_one_element_per_line : subclass = OBIDMS_column_str @@ -355,6 +364,8 @@ cdef class OBIView : data_type = OBI_BOOL elif type == 'OBI_CHAR' : data_type = OBI_CHAR + elif type == 'OBI_QUAL' : + data_type = OBI_QUAL elif type == 'OBI_STR' : data_type = OBI_STR elif type == 'OBI_SEQ' : @@ -489,6 +500,7 @@ cdef class OBIView_NUC_SEQS(OBIView): self.ids = self.columns[bytes2str(ID_COLUMN)] self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)] self.definitions = self.columns[bytes2str(DEFINITION_COLUMN)] + self.qualities = self.columns[bytes2str(QUALITY_COLUMN)] cpdef delete_column(self, str column_name) : @@ -537,6 +549,7 @@ cdef class OBIView_line : def __setitem__(self, str column_name, object value): # TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get) + # TODO OBI_QUAL ? cdef type value_type cdef str value_obitype if column_name not in self.view : diff --git a/python/obitools3/obidms/_obidmscolumn_bool.cfiles b/python/obitools3/obidms/_obidmscolumn_bool.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obidmscolumn_bool.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_bool.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obidmscolumn_char.cfiles b/python/obitools3/obidms/_obidmscolumn_char.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obidmscolumn_char.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_char.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obidmscolumn_float.cfiles b/python/obitools3/obidms/_obidmscolumn_float.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obidmscolumn_float.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_float.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obidmscolumn_int.cfiles b/python/obitools3/obidms/_obidmscolumn_int.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obidmscolumn_int.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_int.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obidmscolumn_qual.cfiles b/python/obitools3/obidms/_obidmscolumn_qual.cfiles new file mode 100644 index 0000000..bf37301 --- /dev/null +++ b/python/obitools3/obidms/_obidmscolumn_qual.cfiles @@ -0,0 +1,59 @@ +../../../src/bloom.h +../../../src/bloom.c +../../../src/char_str_indexer.h +../../../src/char_str_indexer.c +../../../src/crc64.h +../../../src/crc64.c +../../../src/dna_seq_indexer.h +../../../src/dna_seq_indexer.c +../../../src/encode.h +../../../src/encode.c +../../../src/murmurhash2.h +../../../src/murmurhash2.c +../../../src/obi_align.h +../../../src/obi_align.c +../../../src/obiavl.h +../../../src/obiavl.c +../../../src/obiblob_indexer.h +../../../src/obiblob_indexer.c +../../../src/obiblob.h +../../../src/obiblob.c +../../../src/obidebug.h +../../../src/obidms_taxonomy.h +../../../src/obidms_taxonomy.c +../../../src/obidms.h +../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h +../../../src/obidmscolumn_idx.h +../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h +../../../src/obidmscolumn.h +../../../src/obidmscolumn.c +../../../src/obidmscolumndir.h +../../../src/obidmscolumndir.c +../../../src/obierrno.h +../../../src/obierrno.c +../../../src/obilittlebigman.h +../../../src/obilittlebigman.c +../../../src/obitypes.h +../../../src/obitypes.c +../../../src/obiview.h +../../../src/obiview.c +../../../src/sse_banded_LCS_alignment.h +../../../src/sse_banded_LCS_alignment.c +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obidmscolumn_qual.pxd b/python/obitools3/obidms/_obidmscolumn_qual.pxd new file mode 100644 index 0000000..6384431 --- /dev/null +++ b/python/obitools3/obidms/_obidmscolumn_qual.pxd @@ -0,0 +1,17 @@ +#cython: language_level=3 + +from .capi.obitypes cimport index_t +from ._obidms cimport OBIDMS_column #, OBIDMS_column_multi_elts + + +cdef class OBIDMS_column_qual(OBIDMS_column): + cpdef object get_line(self, index_t line_nb) + cpdef object get_str_line(self, index_t line_nb) + cpdef set_line(self, index_t line_nb, object value) + cpdef set_str_line(self, index_t line_nb, object value) + + +# cdef class OBIDMS_column_multi_elts_qual(OBIDMS_column_multi_elts): +# cpdef object get_item(self, index_t line_nb, str element_name) +# cpdef object get_line(self, index_t line_nb) +# cpdef set_item(self, index_t line_nb, str element_name, object value) diff --git a/python/obitools3/obidms/_obidmscolumn_qual.pyx b/python/obitools3/obidms/_obidmscolumn_qual.pyx new file mode 100644 index 0000000..a6e4a6b --- /dev/null +++ b/python/obitools3/obidms/_obidmscolumn_qual.pyx @@ -0,0 +1,151 @@ +#cython: language_level=3 + +from .capi.obiview cimport obi_column_get_obiqual_char_with_elt_name_in_view, \ + obi_column_get_obiqual_char_with_elt_idx_in_view, \ + obi_column_set_obiqual_char_with_elt_name_in_view, \ + obi_column_set_obiqual_char_with_elt_idx_in_view, \ + obi_column_get_obiqual_int_with_elt_name_in_view, \ + obi_column_get_obiqual_int_with_elt_idx_in_view, \ + obi_column_set_obiqual_int_with_elt_name_in_view, \ + obi_column_set_obiqual_int_with_elt_idx_in_view + +from .capi.obierrno cimport obi_errno +from .capi.obitypes cimport OBIQual_char_NA, OBIQual_int_NA, const_char_p + +from ._obidms cimport OBIView + +from obitools3.utils cimport str2bytes, bytes2str + +from libc.stdlib cimport free +from libc.string cimport strcmp +from libc.stdint cimport uint8_t +from libc.stdlib cimport malloc + + +cdef class OBIDMS_column_qual(OBIDMS_column): + + cpdef object get_line(self, index_t line_nb): + cdef const uint8_t* value + cdef int value_length + cdef object result + cdef int i + value = obi_column_get_obiqual_int_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, &value_length) + if obi_errno > 0 : + raise IndexError(line_nb) + if value == NULL : # TODO + result = None + else : + result = [] + for i in range(value_length) : + result.append(value[i]) + return result + + cpdef object get_str_line(self, index_t line_nb): + cdef char* value + cdef object result + cdef int i + value = obi_column_get_obiqual_char_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0) + if obi_errno > 0 : + raise IndexError(line_nb) + if strcmp(value, OBIQual_char_NA) == 0 : + result = None + else : + result = bytes2str(value) + free(value) + return result + + cpdef set_line(self, index_t line_nb, object value): + cdef uint8_t* value_b + cdef int value_length + if value is None : + value_b = NULL # TODO + value_length = 0 + else : + value_length = len(value) + value_b = malloc(value_length * sizeof(uint8_t)) + for i in range(value_length) : + value_b[i] = value[i] + if obi_column_set_obiqual_int_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, value_b, value_length) < 0: + raise Exception("Problem setting a value in a column") + if value is not None : + free(value_b) + + cpdef set_str_line(self, index_t line_nb, object value): + cdef bytes value_b + if value is None : + value_b = OBIQual_char_NA + else : + value_b = str2bytes(value) + if obi_column_set_obiqual_char_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, value_b) < 0: + raise Exception("Problem setting a value in a column") + + +# TODO OR NOT? +# cdef class OBIDMS_column_multi_elts_qual(OBIDMS_column_multi_elts): +# +# +# cpdef object get_item(self, index_t line_nb, str element_name): +# cdef const uint8_t* value +# cdef int value_length +# cdef object result +# cdef int i +# value = obi_column_get_obiqual_int_with_elt_name_in_view(self.view.pointer, (self.pointer)[0], line_nb, str2bytes(element_name), &value_length) +# if obi_errno > 0 : +# raise IndexError(line_nb, element_name) +# if value == NULL: # TODO +# result = None +# else : +# result = [] +# for i in range(value_length) : +# result.append(value[i]) +# return result +# +# # cpdef object get_str_item(self, index_t line_nb, str element_name): +# # pass +# # cdef char* value +# # cdef object result +# # value = obi_column_get_obiseq_with_elt_name_in_view(self.view.pointer, (self.pointer)[0], line_nb, str2bytes(element_name)) +# # if obi_errno > 0 : +# # raise IndexError(line_nb, element_name) +# # if strcmp(value, OBISeq_NA) == 0 : +# # result = None +# # else : +# # result = bytes2str(value) +# # free(value) +# # return result +# +# cpdef object get_line(self, index_t line_nb) : +# pass +# # cdef char* value +# # cdef object value_in_result +# # cdef dict result +# # cdef index_t i +# # cdef bint all_NA +# # result = {} +# # all_NA = True +# # for i in range(self.nb_elements_per_line) : +# # value = obi_column_get_obiseq_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, i) +# # if obi_errno > 0 : +# # raise IndexError(line_nb) +# # if strcmp(value, OBISeq_NA) == 0 : +# # value_in_result = None +# # else : +# # value_in_result = bytes2str(value) +# # free(value) +# # result[self.elements_names[i]] = value_in_result +# # if all_NA and (value_in_result is not None) : +# # all_NA = False +# # if all_NA : +# # result = None +# # return result +# +# cpdef set_item(self, index_t line_nb, str element_name, object value): +# pass +# # cdef bytes value_b +# # if value is None : +# # value_b = OBISeq_NA +# # else : +# # value_b = str2bytes(value) +# # if obi_column_set_obiseq_with_elt_name_in_view(self.view.pointer, (self.pointer)[0], line_nb, str2bytes(element_name), value_b) < 0: +# # raise Exception("Problem setting a value in a column") +# # diff --git a/python/obitools3/obidms/_obidmscolumn_seq.cfiles b/python/obitools3/obidms/_obidmscolumn_seq.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obidmscolumn_seq.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_seq.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obidmscolumn_str.cfiles b/python/obitools3/obidms/_obidmscolumn_str.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obidmscolumn_str.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_str.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obiseq.cfiles b/python/obitools3/obidms/_obiseq.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obiseq.cfiles +++ b/python/obitools3/obidms/_obiseq.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/_obiseq.pxd b/python/obitools3/obidms/_obiseq.pxd index cbede6e..8ba239a 100644 --- a/python/obitools3/obidms/_obiseq.pxd +++ b/python/obitools3/obidms/_obiseq.pxd @@ -16,8 +16,12 @@ cdef class OBI_Seq(dict) : cdef class OBI_Nuc_Seq(OBI_Seq) : + cdef object quality + #cpdef str reverse_complement(self) cpdef set_sequence(self, str sequence) + cpdef set_quality(self, object quality) + cpdef get_quality(self) cdef class OBI_Nuc_Seq_Stored(OBIView_line) : @@ -27,4 +31,8 @@ cdef class OBI_Nuc_Seq_Stored(OBIView_line) : cpdef get_definition(self) cpdef set_sequence(self, str sequence) cpdef get_sequence(self) + cpdef set_quality(self, object quality) + cpdef get_quality(self) + cpdef get_str_quality(self) + # cpdef str reverse_complement(self) diff --git a/python/obitools3/obidms/_obiseq.pyx b/python/obitools3/obidms/_obiseq.pyx index 0f76697..8a09d4c 100644 --- a/python/obitools3/obidms/_obiseq.pyx +++ b/python/obitools3/obidms/_obiseq.pyx @@ -4,7 +4,8 @@ from obitools3.utils cimport bytes2str, str2bytes from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \ ID_COLUMN, \ - DEFINITION_COLUMN + DEFINITION_COLUMN, \ + QUALITY_COLUMN cdef class OBI_Seq(dict) : @@ -27,7 +28,7 @@ cdef class OBI_Seq(dict) : cpdef get_definition(self) : return self.definition - + cpdef get_sequence(self) : return self.sequence @@ -41,12 +42,21 @@ cdef class OBI_Nuc_Seq(OBI_Seq) : self.sequence = sequence self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence + cpdef set_quality(self, object quality) : + self.quality = quality + self[bytes2str(QUALITY_COLUMN)] = quality + + cpdef get_quality(self) : + return self.quality + # cpdef str reverse_complement(self) : TODO in C ? # pass cdef class OBI_Nuc_Seq_Stored(OBIView_line) : + # TODO store the str version of column name macros + cpdef set_id(self, str id) : self[bytes2str(ID_COLUMN)] = id @@ -65,6 +75,18 @@ cdef class OBI_Nuc_Seq_Stored(OBIView_line) : cpdef get_sequence(self) : return self[bytes2str(NUC_SEQUENCE_COLUMN)] + cpdef set_quality(self, object quality) : + if type(quality) == list : + self[bytes2str(QUALITY_COLUMN)] = quality + else : # Quality is in str form + (((self.view).columns)[bytes2str(QUALITY_COLUMN)]).set_str_line(self.index, quality) + + cpdef get_quality(self) : + return self[bytes2str(QUALITY_COLUMN)] + + cpdef get_str_quality(self) : + return ((self.view).columns)[bytes2str(QUALITY_COLUMN)].get_str_line(self.index) + # def __str__(self) : # return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not diff --git a/python/obitools3/obidms/_obitaxo.cfiles b/python/obitools3/obidms/_obitaxo.cfiles index bbdb163..bf37301 100644 --- a/python/obitools3/obidms/_obitaxo.cfiles +++ b/python/obitools3/obidms/_obitaxo.cfiles @@ -23,8 +23,22 @@ ../../../src/obidms_taxonomy.c ../../../src/obidms.h ../../../src/obidms.c +../../../src/obidmscolumn_bool.c +../../../src/obidmscolumn_bool.h +../../../src/obidmscolumn_char.c +../../../src/obidmscolumn_char.h +../../../src/obidmscolumn_float.c +../../../src/obidmscolumn_float.h ../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.c +../../../src/obidmscolumn_int.c +../../../src/obidmscolumn_int.h +../../../src/obidmscolumn_qual.h +../../../src/obidmscolumn_qual.c +../../../src/obidmscolumn_seq.c +../../../src/obidmscolumn_seq.h +../../../src/obidmscolumn_str.c +../../../src/obidmscolumn_str.h ../../../src/obidmscolumn.h ../../../src/obidmscolumn.c ../../../src/obidmscolumndir.h @@ -37,19 +51,9 @@ ../../../src/obitypes.c ../../../src/obiview.h ../../../src/obiview.c -../../../src/utils.h -../../../src/utils.c ../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.c -../../../src/obidmscolumn_bool.c -../../../src/obidmscolumn_bool.h -../../../src/obidmscolumn_char.c -../../../src/obidmscolumn_char.h -../../../src/obidmscolumn_float.c -../../../src/obidmscolumn_float.h -../../../src/obidmscolumn_int.c -../../../src/obidmscolumn_int.h -../../../src/obidmscolumn_seq.c -../../../src/obidmscolumn_seq.h -../../../src/obidmscolumn_str.c -../../../src/obidmscolumn_str.h +../../../src/uint8_indexer.h +../../../src/uint8_indexer.c +../../../src/utils.h +../../../src/utils.c diff --git a/python/obitools3/obidms/capi/obidmscolumn.pxd b/python/obitools3/obidms/capi/obidmscolumn.pxd index cc151e4..84694c8 100644 --- a/python/obitools3/obidms/capi/obidmscolumn.pxd +++ b/python/obitools3/obidms/capi/obidmscolumn.pxd @@ -11,6 +11,8 @@ from ..capi.obitypes cimport const_char_p, \ index_t, \ time_t +from libc.stdint cimport uint8_t + cdef extern from "obidmscolumn.h" nogil: @@ -194,3 +196,46 @@ cdef extern from "obidmscolumn_seq.h" nogil: index_t line_nb, index_t element_idx) + +cdef extern from "obidmscolumn_qual.h" nogil: + + int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, + index_t line_nb, + const_char_p element_name, + const_char_p value) + + int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, + index_t line_nb, + index_t element_idx, + const_char_p value) + + int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, + index_t line_nb, + const_char_p element_name, + const uint8_t* value, + int value_length) + + int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, + index_t line_nb, + index_t element_idx, + const uint8_t* value, + int value_length) + + char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, + index_t line_nb, + const_char_p element_name) + + char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, + index_t line_nb, + index_t element_idx) + + const uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, + index_t line_nb, + const_char_p element_name, + int* value_length) + + const uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, + index_t line_nb, + index_t element_idx, + int* value_length) + diff --git a/python/obitools3/obidms/capi/obitypes.pxd b/python/obitools3/obidms/capi/obitypes.pxd index 8e72129..6a83a26 100644 --- a/python/obitools3/obidms/capi/obitypes.pxd +++ b/python/obitools3/obidms/capi/obitypes.pxd @@ -21,6 +21,7 @@ cdef extern from "obitypes.h" nogil: OBI_FLOAT, OBI_BOOL, OBI_CHAR, + OBI_QUAL, OBI_STR, OBI_SEQ, OBI_IDX @@ -46,5 +47,8 @@ cdef extern from "obitypes.h" nogil: extern obibool_t OBIBool_NA extern const_char_p OBISeq_NA extern const_char_p OBIStr_NA + extern const_char_p OBIQual_int_NA + extern const_char_p OBIQual_char_NA const_char_p name_data_type(int data_type) + diff --git a/python/obitools3/obidms/capi/obiview.pxd b/python/obitools3/obidms/capi/obiview.pxd index 79cb4d0..6c48de6 100644 --- a/python/obitools3/obidms/capi/obiview.pxd +++ b/python/obitools3/obidms/capi/obiview.pxd @@ -12,6 +12,8 @@ from .obitypes cimport const_char_p, \ from ..capi.obidms cimport OBIDMS_p from ..capi.obidmscolumn cimport OBIDMS_column_p +from libc.stdint cimport uint8_t + cdef extern from "obiview.h" nogil: @@ -19,6 +21,7 @@ cdef extern from "obiview.h" nogil: extern const_char_p NUC_SEQUENCE_COLUMN extern const_char_p ID_COLUMN extern const_char_p DEFINITION_COLUMN + extern const_char_p QUALITY_COLUMN struct Obiview_t : OBIDMS_p dms @@ -203,6 +206,54 @@ cdef extern from "obiview.h" nogil: index_t line_nb, index_t element_idx) + int obi_column_set_obiqual_char_with_elt_idx_in_view(Obiview_p view, + OBIDMS_column_p column, + index_t line_nb, + index_t element_idx, + const char* value) + + int obi_column_set_obiqual_int_with_elt_idx_in_view(Obiview_p view, + OBIDMS_column_p column, + index_t line_nb, + index_t element_idx, + const uint8_t* value, + int value_length) + + char* obi_column_get_obiqual_char_with_elt_idx_in_view(Obiview_p view, + OBIDMS_column_p column, + index_t line_nb, + index_t element_idx) + + const uint8_t* obi_column_get_obiqual_int_with_elt_idx_in_view(Obiview_p view, + OBIDMS_column_p column, + index_t line_nb, + index_t element_idx, + int* value_length) + + int obi_column_set_obiqual_char_with_elt_name_in_view(Obiview_p view, + OBIDMS_column_p column, + index_t line_nb, + const char* element_name, + const char* value) + + int obi_column_set_obiqual_int_with_elt_name_in_view(Obiview_p view, + OBIDMS_column_p column, + index_t line_nb, + const char* element_name, + const uint8_t* value, + int value_length) + + char* obi_column_get_obiqual_char_with_elt_name_in_view(Obiview_p view, + OBIDMS_column_p column, + index_t line_nb, + const char* element_name) + + const uint8_t* obi_column_get_obiqual_int_with_elt_name_in_view(Obiview_p view, + OBIDMS_column_p column, + index_t line_nb, + const char* element_name, + int* value_length) + int obi_column_set_obistr_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, diff --git a/python/obitools3/obigrep.py b/python/obitools3/obigrep.py index 20577fe..b7f9cbf 100644 --- a/python/obitools3/obigrep.py +++ b/python/obitools3/obigrep.py @@ -36,7 +36,7 @@ if __name__ == '__main__': if l['score'] > 350 : line_selec.append(i) i+=1 - + new_v = d.new_view(args.new_view, view_to_clone=v, line_selection=line_selec, view_type="NUC_SEQS_VIEW", comments="obigrep "+args.view+" to "+args.new_view) #args.key+" "+str(args.comparison)+" "+str(args.value)+" "+) print("\n") diff --git a/src/char_str_indexer.c b/src/char_str_indexer.c index d6f528a..871deb0 100644 --- a/src/char_str_indexer.c +++ b/src/char_str_indexer.c @@ -14,6 +14,7 @@ #include #include +#include "char_str_indexer.h" #include "obiblob.h" #include "obiblob_indexer.h" #include "obidebug.h" @@ -25,24 +26,16 @@ Obi_blob_p obi_str_to_blob(const char* value) { - Obi_blob_p value_b; - int32_t length; + int32_t length; // Compute the number of bytes on which the value will be encoded length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster) - value_b = obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length); - if (value_b == NULL) - { - obidebug(1, "\nError encoding a character string in a blob"); - return NULL; - } - - return value_b; + return obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length); } -char* obi_blob_to_str(Obi_blob_p value_b) +const char* obi_blob_to_str(Obi_blob_p value_b) { return value_b->value; } @@ -67,7 +60,7 @@ index_t obi_index_char_str(Obi_indexer_p indexer, const char* value) } -char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx) +const char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx) { Obi_blob_p value_b; diff --git a/src/char_str_indexer.h b/src/char_str_indexer.h index 48d30ce..60d75a7 100644 --- a/src/char_str_indexer.h +++ b/src/char_str_indexer.h @@ -35,7 +35,7 @@ * @since October 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -Obi_blob_p obi_str_to_blob(char* value); +Obi_blob_p obi_str_to_blob(const char* value); /** @@ -80,7 +80,7 @@ index_t obi_index_char_str(Obi_indexer_p indexer, const char* value); * @since April 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx); +const char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx); #endif /* CHAR_STR_INDEXER_H_ */ diff --git a/src/dna_seq_indexer.c b/src/dna_seq_indexer.c index b885042..af5191d 100644 --- a/src/dna_seq_indexer.c +++ b/src/dna_seq_indexer.c @@ -14,6 +14,7 @@ #include #include +#include "dna_seq_indexer.h" #include "obiblob.h" #include "obiblob_indexer.h" #include "obidebug.h" diff --git a/src/obiblob.h b/src/obiblob.h index b9b02b9..0ae2592 100644 --- a/src/obiblob.h +++ b/src/obiblob.h @@ -23,6 +23,8 @@ #define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string. */ +#define ELEMENT_SIZE_UINT8 (8) /**< The size of an element from a value of type uint8_t. + */ #define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits. */ #define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits. diff --git a/src/obidmscolumn_qual.c b/src/obidmscolumn_qual.c index 307bfbf..a789f64 100644 --- a/src/obidmscolumn_qual.c +++ b/src/obidmscolumn_qual.c @@ -6,7 +6,7 @@ * @file obidsmcolumn_qual.c * @author Celine Mercier * @date May 4th 2016 - * @brief Functions handling OBIColumns containing data in the form of indices referring to sequence quality arrays. + * @brief Functions handling OBIColumns containing data in the form of indices referring to sequence qualities. */ @@ -14,9 +14,10 @@ #include #include +#include "obidmscolumn_qual.h" #include "obidmscolumn.h" #include "obitypes.h" -#include "obidmscolumn_str.c" +#include "uint8_indexer.h" /********************************************************************** @@ -26,38 +27,83 @@ **********************************************************************/ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value) -{ // TODO discuss - return obi_column_set_obistr_with_elt_idx(column, line_nb, element_idx, value); +{ + uint8_t* int_value; + int int_value_length; + int i; + int ret_value; + + int_value_length = strlen(value); + int_value = (uint8_t*) malloc(int_value_length * sizeof(uint8_t)); + + // Convert in uint8_t array to index in that format + for (i=0; iindexer, value, value_length); + if (idx == -1) // An error occurred + { + if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR) + { + // If the error is that the indexer is read-only, clone it + new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version); + if (new_indexer_name == NULL) + return -1; + column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow? + // Add the value in the new indexer + idx = obi_index_uint8(column->indexer, value, value_length); + if (idx == -1) + return -1; + } + else + return -1; + } + // Add the value's index in the column + *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = idx; return 0; } char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) -{ // TODO discuss - char* value; +{ + char* value; + const uint8_t* int_value; + int int_value_length; + int i; - value = obi_column_get_obistr_with_elt_idx(column, line_nb, element_idx); - if (strcmp(value, OBIStr_NA) == 0) - return OBIQual_char_NA; + int_value = obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, &int_value_length); + + value = (char*) malloc((int_value_length + 1) * sizeof(char)); + + // Encode int quality to char quality + for (i=0; iindexer, idx); + return obi_retrieve_uint8(column->indexer, idx, value_length); } @@ -84,13 +130,13 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li } -int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, uint8_t* value) +int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length) { index_t element_idx = obi_column_get_element_index_from_name(column, element_name); if (element_idx == OBIIdx_NA) return -1; - return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value); + return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value, value_length); } @@ -104,12 +150,12 @@ char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t } -uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name) // TODO const? (mapped) +const uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length) { index_t element_idx = obi_column_get_element_index_from_name(column, element_name); if (element_idx == OBIIdx_NA) return OBIQual_int_NA; - return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx); + return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, value_length); } diff --git a/src/obidmscolumn_qual.h b/src/obidmscolumn_qual.h index d8eec88..9057983 100644 --- a/src/obidmscolumn_qual.h +++ b/src/obidmscolumn_qual.h @@ -6,7 +6,7 @@ * @file obidsmcolumn_qual.h * @author Celine Mercier * @date May 4th 2016 - * @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to sequence quality arrays. + * @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to sequence qualities. */ @@ -22,6 +22,12 @@ #include "obitypes.h" +#define QUALITY_ASCII_BASE (33) /**< The ASCII base of sequence quality. + * Used to convert sequence qualities from characters to integers + * and the other way around. + */ + + /** * @brief Sets a value in an OBIDMS column containing data in the form of indices referring * to sequence qualities handled by an indexer, and using the index of the element in the column's line. @@ -56,7 +62,8 @@ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t lin * @param column A pointer as returned by obi_create_column() or obi_clone_column(). * @param line_nb The number of the line where the value should be set. * @param element_idx The index of the element that should be set in the line. - * @param value The value that should be set, in the integer format. + * @param value The value that should be set, in the integer array format. + * @param value_length The length of the integer array. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. @@ -65,7 +72,7 @@ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t lin * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, uint8_t* value); +int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length); /** @@ -79,7 +86,7 @@ int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line * @param element_idx The index of the element that should be recovered in the line. * * @returns The recovered value, in the character string format. - * @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. + * @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set. * * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) @@ -96,14 +103,15 @@ char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t l * @param column A pointer as returned by obi_create_column(). * @param line_nb The number of the line where the value should be recovered. * @param element_idx The index of the element that should be recovered in the line. + * @param value_length A pointer on an integer to store the length of the integer array recovered. * - * @returns The recovered value, in the integer format. - * @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. + * @returns The recovered value, in the integer array format. + * @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set. * * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx); +const uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length); /** @@ -133,7 +141,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li * @brief Sets a value in an OBIDMS column containing data in the form of indices referring * to sequence qualities handled by an indexer, and using the index of the element in the column's line. * - * This function is for quality scores in the integer format. + * This function is for quality scores in the integer array format. * * @warning Pointers returned by obi_open_column() don't allow writing. * @@ -141,6 +149,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li * @param line_nb The number of the line where the value should be set. * @param element_name The name of the element that should be set in the line. * @param value The value that should be set, in the integer format. + * @param value_length The length of the integer array. * * @returns An integer value indicating the success of the operation. * @retval 0 on success. @@ -149,7 +158,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, uint8_t* value); +int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length); /** @@ -163,7 +172,7 @@ int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t lin * @param element_name The name of the element that should be recovered in the line. * * @returns The recovered value, in the character string format. - * @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. + * @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set. * * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) @@ -175,19 +184,20 @@ char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring * to sequence qualities handled by an indexer, and using the index of the element in the column's line. * - * This function returns quality scores in the integer format. + * This function returns quality scores in the integer array format. * * @param column A pointer as returned by obi_create_column() or obi_clone_column(). * @param line_nb The number of the line where the value should be recovered. * @param element_name The name of the element that should be recovered in the line. + * @param value_length A pointer on an integer to store the length of the integer array recovered. * * @returns The recovered value, in the integer format. - * @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. + * @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set. * * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -utin8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name); +const uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length); #endif /* OBIDMSCOLUMN_QUAL_H_ */ diff --git a/src/obidmscolumn_str.c b/src/obidmscolumn_str.c index 1b46c86..111ab8c 100644 --- a/src/obidmscolumn_str.c +++ b/src/obidmscolumn_str.c @@ -29,17 +29,6 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t idx; char* new_indexer_name; -// TODO -// size_t i; -// uint8_t q; -// for (i=0;i<=strlen(value);i++) -// { -// if () -// q = ((uint8_t) value[i]) - 33; -// fprintf(stderr, "\n%c == %u", value[i], q); -// } - - if (obi_column_prepare_to_set_value(column, line_nb) < 0) return -1; diff --git a/src/obitypes.h b/src/obitypes.h index 01d46ab..b64dd46 100644 --- a/src/obitypes.h +++ b/src/obitypes.h @@ -25,7 +25,7 @@ #define OBISeq_NA ("\0") /**< NA value for the type OBI_SEQ */ // TODO discuss #define OBIStr_NA ("\0") /**< NA value for the type OBI_STR */ // TODO discuss #define OBIQual_char_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in character string format */ // TODO test and discuss -#define OBIQual_int_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in integer format */ // TODO test and discuss +#define OBIQual_int_NA (NULL) /**< NA value for the type OBI_QUAL if the quality is in integer format */ // TODO test and discuss /** diff --git a/src/obiview.c b/src/obiview.c index e06bf37..62ecff7 100644 --- a/src/obiview.c +++ b/src/obiview.c @@ -24,6 +24,7 @@ #include "obidmscolumn_char.h" #include "obidmscolumn_float.h" #include "obidmscolumn_int.h" +#include "obidmscolumn_qual.h" #include "obidmscolumn_seq.h" #include "obidmscolumn_str.h" #include "obierrno.h" @@ -600,8 +601,6 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v if (view_to_clone == NULL) { - // TODO Add quality column? - // Adding sequence column if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", "Nucleotide sequences", true) < 0) { @@ -620,6 +619,12 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; } + // Adding quality column + if (obi_view_add_column(view, QUALITY_COLUMN, -1, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", "Sequence qualities", true) < 0) + { + obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); + return NULL; + } } return view; @@ -1490,6 +1495,78 @@ obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_colu /****************************************/ +/*********** FOR QUAL COLUMNS ***********/ + +int obi_column_set_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value) +{ + if (prepare_to_set_value_in_column(view, &column, &line_nb) < 0) + return -1; + return obi_column_set_obiqual_char_with_elt_idx(column, line_nb, element_idx, value); +} + + +int obi_column_set_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length) +{ + if (prepare_to_set_value_in_column(view, &column, &line_nb) < 0) + return -1; + return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value, value_length); +} + + +char* obi_column_get_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx) +{ + if (prepare_to_get_value_from_column(view, &line_nb) < 0) + return OBIQual_char_NA; + return obi_column_get_obiqual_char_with_elt_idx(column, line_nb, element_idx); +} + + +const uint8_t* obi_column_get_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length) +{ + if (prepare_to_get_value_from_column(view, &line_nb) < 0) + return OBIQual_int_NA; + return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, value_length); +} + + +int obi_column_set_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value) +{ + index_t element_idx = obi_column_get_element_index_from_name(column, element_name); + if (element_idx == OBIIdx_NA) + return -1; + return obi_column_set_obiqual_char_with_elt_idx_in_view(view, column, line_nb, element_idx, value); +} + + +int obi_column_set_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length) +{ + index_t element_idx = obi_column_get_element_index_from_name(column, element_name); + if (element_idx == OBIIdx_NA) + return -1; + return obi_column_set_obiqual_int_with_elt_idx_in_view(view, column, line_nb, element_idx, value, value_length); +} + + +char* obi_column_get_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name) +{ + index_t element_idx = obi_column_get_element_index_from_name(column, element_name); + if (element_idx == OBIIdx_NA) + return OBIQual_char_NA; + return obi_column_get_obiqual_char_with_elt_idx_in_view(view, column, line_nb, element_idx); +} + + +const uint8_t* obi_column_get_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length) +{ + index_t element_idx = obi_column_get_element_index_from_name(column, element_name); + if (element_idx == OBIIdx_NA) + return OBIQual_int_NA; + return obi_column_get_obiqual_int_with_elt_idx_in_view(view, column, line_nb, element_idx, value_length); +} + +/****************************************/ + + /*********** FOR SEQ COLUMNS ***********/ int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value) diff --git a/src/obiview.h b/src/obiview.h index 319b8d5..cdaae51 100644 --- a/src/obiview.h +++ b/src/obiview.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,9 @@ #define DEFINITION_COLUMN "DEFINITION" /**< The name of the column containing the sequence definitions * in NUC_SEQS_VIEW views. */ +#define QUALITY_COLUMN "QUALITY" /**< The name of the column containing the sequence qualities + * in NUC_SEQS_VIEW views. + */ /** @@ -215,6 +219,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con * - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored * - ID_COLUMN where sequence identifiers are stored * - DEFINITION_COLUMN where sequence definitions are stored + * - QUALITY_COLUMN where sequence qualities are stored * * @param dms A pointer on the OBIDMS. * @param view_name The unique name of the view. @@ -246,6 +251,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v * - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored * - ID_COLUMN where sequence identifiers are stored * - DEFINITION_COLUMN where sequence definitions are stored + * - QUALITY_COLUMN where sequence qualities are stored * * @param dms A pointer on the OBIDMS. * @param view_name The unique name of the new view. @@ -803,6 +809,194 @@ int obi_column_set_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name); +/** + * @brief Sets a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line, + * in the context of a view. + * + * This function is for qualities in the character string format. + * + * @warning Pointers returned by obi_open_column() don't allow writing. + * + * @param view A pointer on the opened view. + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be set. + * @param element_idx The index of the element that should be set in the line. + * @param value The value that should be set, in the character string format. + * + * @returns An integer value indicating the success of the operation. + * @retval 0 on success. + * @retval -1 if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_column_set_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value); + + +/** + * @brief Sets a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line, + * in the context of a view. + * + * This function is for qualities in the integer format. + * + * @warning Pointers returned by obi_open_column() don't allow writing. + * + * @param view A pointer on the opened view. + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be set. + * @param element_idx The index of the element that should be set in the line. + * @param value The value that should be set, in the integer array format. + * @param value_length The length of the integer array. + * + * @returns An integer value indicating the success of the operation. + * @retval 0 on success. + * @retval -1 if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_column_set_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length); + + +/** + * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line, + * in the context of a view. + * + * This function returns quality scores in the character string format. + * + * @param view A pointer on the opened view. + * @param column A pointer as returned by obi_create_column(). + * @param line_nb The number of the line where the value should be recovered. + * @param element_idx The index of the element that should be recovered in the line. + * + * @returns The recovered value, in the character string format. + * @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +char* obi_column_get_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx); + + +/** + * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line, + * in the context of a view. + * + * This function returns quality scores in the integer format. + * + * @param view A pointer on the opened view. + * @param column A pointer as returned by obi_create_column(). + * @param line_nb The number of the line where the value should be recovered. + * @param element_idx The index of the element that should be recovered in the line. + * @param value_length A pointer on an integer to store the length of the integer array recovered. + * + * @returns The recovered value, in the integer array format. + * @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +const uint8_t* obi_column_get_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length); + + +/** + * @brief Sets a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line, + * in the context of a view. + * + * This function is for quality scores in the character string format. + * + * @warning Pointers returned by obi_open_column() don't allow writing. + * + * @param view A pointer on the opened view. + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be set. + * @param element_name The name of the element that should be set in the line. + * @param value The value that should be set, in the character string format. + * + * @returns An integer value indicating the success of the operation. + * @retval 0 on success. + * @retval -1 if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_column_set_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value); + + +/** + * @brief Sets a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line, + * in the context of a view. + * + * This function is for quality scores in the integer array format. + * + * @warning Pointers returned by obi_open_column() don't allow writing. + * + * @param view A pointer on the opened view. + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be set. + * @param element_name The name of the element that should be set in the line. + * @param value The value that should be set, in the integer format. + * @param value_length The length of the integer array. + * + * @returns An integer value indicating the success of the operation. + * @retval 0 on success. + * @retval -1 if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_column_set_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length); + + +/** + * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line, + * in the context of a view. + * + * This function returns quality scores in the character string format. + * + * @param view A pointer on the opened view. + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be recovered. + * @param element_name The name of the element that should be recovered in the line. + * + * @returns The recovered value, in the character string format. + * @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +char* obi_column_get_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name); + + +/** + * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring + * to sequence qualities handled by an indexer, and using the index of the element in the column's line, + * in the context of a view. + * + * This function returns quality scores in the integer array format. + * + * @param view A pointer on the opened view. + * @param column A pointer as returned by obi_create_column() or obi_clone_column(). + * @param line_nb The number of the line where the value should be recovered. + * @param element_name The name of the element that should be recovered in the line. + * @param value_length A pointer on an integer to store the length of the integer array recovered. + * + * @returns The recovered value, in the integer format. + * @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +const uint8_t* obi_column_get_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length); + + /** * @brief Sets a value in an OBIDMS column containing data with the type OBI_SEQ, using the index of the element in the line, * in the context of a view. diff --git a/src/uint8_indexer.c b/src/uint8_indexer.c index 3e0657d..cc857fe 100644 --- a/src/uint8_indexer.c +++ b/src/uint8_indexer.c @@ -1,19 +1,21 @@ /**************************************************************************** - * Sequence quality scores indexing functions * + * Uint8 indexing functions * ****************************************************************************/ /** - * @file quality_indexer.c + * @file uint8_indexer.c * @author Celine Mercier * @date May 4th 2016 - * @brief Functions handling the indexing and retrieval of sequence quality scores. + * @brief Functions handling the indexing and retrieval of uint8 arrays. */ #include #include +#include #include +#include "uint8_indexer.h" #include "obiblob.h" #include "obiblob_indexer.h" #include "obidebug.h" @@ -23,60 +25,25 @@ #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) -Obi_blob_p obi_uint8_to_blob(const char* quality) +Obi_blob_p obi_uint8_to_blob(const uint8_t* value, int value_length) { - Obi_blob_p value_b; - int32_t length_encoded_seq; // length of the encoded sequence in bytes - int32_t seq_length; - byte_t* encoded_seq; - - seq_length = strlen(seq); - - // Check if just ATGC and encode accordingly - if (only_ATGC(seq)) - { - // Compute the length (in bytes) of the encoded sequence - length_encoded_seq = ceil((double) seq_length / (double) 4.0); - // Encode - encoded_seq = encode_seq_on_2_bits(seq, seq_length); - if (encoded_seq == NULL) - return NULL; - value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length); - } - else - { - // Compute the length (in bytes) of the encoded sequence - length_encoded_seq = ceil((double) seq_length / (double) 2.0); - // Encode - encoded_seq = encode_seq_on_4_bits(seq, seq_length); - if (encoded_seq == NULL) - return NULL; - value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length); - } - - free(encoded_seq); - - return value_b; + return obi_blob((byte_t*)value, ELEMENT_SIZE_UINT8, value_length, value_length); } -char* obi_blob_to_quality_char(Obi_blob_p value_b) +const uint8_t* obi_blob_to_uint8(Obi_blob_p value_b) { - // Decode - if (value_b->element_size == 2) - return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value); - else - return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value); + return ((uint8_t*) (value_b->value)); } -index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value) +index_t obi_index_uint8(Obi_indexer_p indexer, const uint8_t* value, int value_length) { Obi_blob_p value_b; index_t idx; // Encode value - value_b = obi_seq_to_blob(value); + value_b = obi_uint8_to_blob(value, value_length); if (value_b == NULL) return -1; @@ -89,7 +56,7 @@ index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value) } -char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx) +const uint8_t* obi_retrieve_uint8(Obi_indexer_p indexer, index_t idx, int* value_length) { Obi_blob_p value_b; @@ -97,6 +64,7 @@ char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx) value_b = obi_indexer_get(indexer, idx); // Return decoded sequence - return obi_blob_to_seq(value_b); + *value_length = value_b->length_decoded_value; + return obi_blob_to_uint8(value_b); } diff --git a/src/uint8_indexer.h b/src/uint8_indexer.h index 750ff5c..b4d6a8f 100644 --- a/src/uint8_indexer.h +++ b/src/uint8_indexer.h @@ -1,17 +1,17 @@ /**************************************************************************** - * DNA sequence indexer header file * + * uint8 indexer header file * ****************************************************************************/ /** - * @file dna_seq_indexer.h + * @file uint8_indexer.h * @author Celine Mercier - * @date April 12th 2016 - * @brief Header file for the functions handling the indexing of DNA sequences. + * @date May 4th 2016 + * @brief Header file for the functions handling the indexing of uint8 arrays. */ -#ifndef DNA_SEQ_INDEXER_H_ -#define DNA_SEQ_INDEXER_H_ +#ifndef UINT8_INDEXER_H_ +#define UINT8_INDEXER_H_ #include @@ -24,64 +24,69 @@ /** - * @brief Converts a DNA sequence to a blob. + * @brief Converts an uint8 array to a blob. * * @warning The blob must be freed by the caller. * - * @param value The DNA sequence to convert. + * @param value The uint8 array to convert. + * @param value_length The length of the uint8 array to convert. * - * @returns A pointer to the blob created. + * @returns A pointer on the blob created. * @retval NULL if an error occurred. * - * @since November 2015 + * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -Obi_blob_p obi_seq_to_blob(const char* seq); +Obi_blob_p obi_uint8_to_blob(const uint8_t* value, int value_length); /** - * @brief Converts a blob to a DNA sequence. + * @brief Converts a blob to an uint8 array. + * + * @warning The array returned is mapped. * * @param value_b The blob to convert. * - * @returns A pointer to the DNA sequence contained in the blob. + * @returns A pointer on the uint8 array contained in the blob. * @retval NULL if an error occurred. * - * @since November 2015 + * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -char* obi_blob_to_seq(Obi_blob_p value_b); +const uint8_t* obi_blob_to_uint8(Obi_blob_p value_b); /** - * @brief Stores a DNA sequence in an indexer and returns the index. + * @brief Stores an uint8 array in an indexer and returns the index. * * @param indexer The indexer structure. - * @param value The DNA sequence to index. + * @param value The uint8 array to index. + * @param value_length The length of the uint8 array to index. * - * @returns The index referring to the stored DNA sequence in the indexer. + * @returns The index referring to the stored uint8 array in the indexer. * - * @since April 2016 + * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value); +index_t obi_index_uint8(Obi_indexer_p indexer, const uint8_t* value, int value_length); /** - * @brief Retrieves a DNA sequence from an indexer. + * @brief Retrieves an uint8 array from an indexer. * - * @warning The DNA sequence returned must be freed by the caller. + * @warning The array returned is mapped. * * @param indexer The indexer structure. - * @param idx The index referring to the DNA sequence to retrieve in the indexer. + * @param idx The index referring to the uint8 array to retrieve in the indexer. + * @param value_length A pointer on an integer to store the length of the array retrieved. * - * @returns A pointer on the DNA sequence. + * @returns A pointer on the uint8 array. * - * @since April 2016 + * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx); +const uint8_t* obi_retrieve_uint8(Obi_indexer_p indexer, index_t idx, int* value_length); -#endif /* DNA_SEQ_INDEXER_H_ */ +#endif /* UINT8_INDEXER_H_ */