Major update: New column type to store sequence qualities. Closes #41

This commit is contained in:
Celine Mercier
2016-05-20 16:45:22 +02:00
parent ffff91e76c
commit 3b59043ea8
33 changed files with 962 additions and 267 deletions

View File

@ -35,7 +35,6 @@ def addOptions(parser):
type=str, type=str,
help="Name of the default DMS for reading and writing data") help="Name of the default DMS for reading and writing data")
group.add_argument('--destination-view','-v', group.add_argument('--destination-view','-v',
action="store", dest="import:destview", action="store", dest="import:destview",
metavar='<VIEW NAME>', metavar='<VIEW NAME>',
@ -96,12 +95,14 @@ def run(config):
inputs = uopen(config['import']['filename']) inputs = uopen(config['import']['filename'])
get_quality = False
if config['import']['seqinformat']=='fasta': if config['import']['seqinformat']=='fasta':
iseq = fastaIterator(inputs) iseq = fastaIterator(inputs)
view_type="NUC_SEQS_VIEW" view_type="NUC_SEQS_VIEW"
elif config['import']['seqinformat']=='fastq': elif config['import']['seqinformat']=='fastq':
iseq = fastqIterator(inputs) iseq = fastqIterator(inputs)
view_type="NUC_SEQS_VIEW" view_type="NUC_SEQS_VIEW"
get_quality = True
else: else:
raise RuntimeError('No file format specified') raise RuntimeError('No file format specified')
@ -120,13 +121,15 @@ def run(config):
view[i].set_id(seq['id']) view[i].set_id(seq['id'])
view[i].set_definition(seq['definition']) view[i].set_definition(seq['definition'])
view[i].set_sequence(seq['sequence']) view[i].set_sequence(seq['sequence'])
if get_quality :
view[i].set_quality(seq['quality'])
for tag in seq['tags'] : for tag in seq['tags'] :
#print(tag, seq['tags'][tag]) #print(tag, seq['tags'][tag])
#if seq['tags'][tag] not in NA_list : #if seq['tags'][tag] not in NA_list :
view[i][tag] = seq['tags'][tag] view[i][tag] = seq['tags'][tag]
i+=1 i+=1
#print(view) #print(i)
print(view.__repr__()) print(view.__repr__())
view.save_and_close() view.save_and_close()

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -70,6 +70,7 @@ cdef class OBIView_NUC_SEQS(OBIView):
cdef OBIDMS_column ids cdef OBIDMS_column ids
cdef OBIDMS_column sequences cdef OBIDMS_column sequences
cdef OBIDMS_column definitions cdef OBIDMS_column definitions
cdef OBIDMS_column qualities
cpdef delete_column(self, str column_name) cpdef delete_column(self, str column_name)

View File

@ -17,6 +17,7 @@ from .capi.obitypes cimport const_char_p, \
OBI_FLOAT, \ OBI_FLOAT, \
OBI_BOOL, \ OBI_BOOL, \
OBI_CHAR, \ OBI_CHAR, \
OBI_QUAL, \
OBI_STR, \ OBI_STR, \
OBI_SEQ, \ OBI_SEQ, \
name_data_type, \ name_data_type, \
@ -43,6 +44,8 @@ from ._obidmscolumn_bool cimport OBIDMS_column_bool, \
from ._obidmscolumn_char cimport OBIDMS_column_char, \ from ._obidmscolumn_char cimport OBIDMS_column_char, \
OBIDMS_column_multi_elts_char OBIDMS_column_multi_elts_char
from ._obidmscolumn_qual cimport OBIDMS_column_qual
from ._obidmscolumn_str cimport OBIDMS_column_str, \ from ._obidmscolumn_str cimport OBIDMS_column_str, \
OBIDMS_column_multi_elts_str OBIDMS_column_multi_elts_str
@ -71,7 +74,8 @@ from .capi.obiview cimport Obiview_p, \
VIEW_TYPE_NUC_SEQS, \ VIEW_TYPE_NUC_SEQS, \
NUC_SEQUENCE_COLUMN, \ NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \ ID_COLUMN, \
DEFINITION_COLUMN DEFINITION_COLUMN, \
QUALITY_COLUMN
from libc.stdlib cimport malloc from libc.stdlib cimport malloc
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
@ -187,6 +191,11 @@ cdef class OBIDMS_column :
subclass = OBIDMS_column_char subclass = OBIDMS_column_char
else : else :
subclass = OBIDMS_column_multi_elts_char subclass = OBIDMS_column_multi_elts_char
elif col_type == OBI_QUAL :
if col_one_element_per_line :
subclass = OBIDMS_column_qual
# else : # TODO
# subclass = OBIDMS_column_multi_elts_qual
elif col_type == OBI_STR : elif col_type == OBI_STR :
if col_one_element_per_line : if col_one_element_per_line :
subclass = OBIDMS_column_str subclass = OBIDMS_column_str
@ -355,6 +364,8 @@ cdef class OBIView :
data_type = OBI_BOOL data_type = OBI_BOOL
elif type == 'OBI_CHAR' : elif type == 'OBI_CHAR' :
data_type = OBI_CHAR data_type = OBI_CHAR
elif type == 'OBI_QUAL' :
data_type = OBI_QUAL
elif type == 'OBI_STR' : elif type == 'OBI_STR' :
data_type = OBI_STR data_type = OBI_STR
elif type == 'OBI_SEQ' : elif type == 'OBI_SEQ' :
@ -489,6 +500,7 @@ cdef class OBIView_NUC_SEQS(OBIView):
self.ids = self.columns[bytes2str(ID_COLUMN)] self.ids = self.columns[bytes2str(ID_COLUMN)]
self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)] self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)]
self.definitions = self.columns[bytes2str(DEFINITION_COLUMN)] self.definitions = self.columns[bytes2str(DEFINITION_COLUMN)]
self.qualities = self.columns[bytes2str(QUALITY_COLUMN)]
cpdef delete_column(self, str column_name) : cpdef delete_column(self, str column_name) :
@ -537,6 +549,7 @@ cdef class OBIView_line :
def __setitem__(self, str column_name, object value): def __setitem__(self, str column_name, object value):
# TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get) # TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get)
# TODO OBI_QUAL ?
cdef type value_type cdef type value_type
cdef str value_obitype cdef str value_obitype
if column_name not in self.view : if column_name not in self.view :

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -0,0 +1,59 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
../../../src/obi_align.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c
../../../src/uint8_indexer.h
../../../src/uint8_indexer.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,17 @@
#cython: language_level=3
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column #, OBIDMS_column_multi_elts
cdef class OBIDMS_column_qual(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef object get_str_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cpdef set_str_line(self, index_t line_nb, object value)
# cdef class OBIDMS_column_multi_elts_qual(OBIDMS_column_multi_elts):
# cpdef object get_item(self, index_t line_nb, str element_name)
# cpdef object get_line(self, index_t line_nb)
# cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -0,0 +1,151 @@
#cython: language_level=3
from .capi.obiview cimport obi_column_get_obiqual_char_with_elt_name_in_view, \
obi_column_get_obiqual_char_with_elt_idx_in_view, \
obi_column_set_obiqual_char_with_elt_name_in_view, \
obi_column_set_obiqual_char_with_elt_idx_in_view, \
obi_column_get_obiqual_int_with_elt_name_in_view, \
obi_column_get_obiqual_int_with_elt_idx_in_view, \
obi_column_set_obiqual_int_with_elt_name_in_view, \
obi_column_set_obiqual_int_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIQual_char_NA, OBIQual_int_NA, const_char_p
from ._obidms cimport OBIView
from obitools3.utils cimport str2bytes, bytes2str
from libc.stdlib cimport free
from libc.string cimport strcmp
from libc.stdint cimport uint8_t
from libc.stdlib cimport malloc
cdef class OBIDMS_column_qual(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef const uint8_t* value
cdef int value_length
cdef object result
cdef int i
value = obi_column_get_obiqual_int_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, &value_length)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == NULL : # TODO
result = None
else :
result = []
for i in range(value_length) :
result.append(<int>value[i])
return result
cpdef object get_str_line(self, index_t line_nb):
cdef char* value
cdef object result
cdef int i
value = obi_column_get_obiqual_char_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if strcmp(value, OBIQual_char_NA) == 0 :
result = None
else :
result = bytes2str(value)
free(value)
return result
cpdef set_line(self, index_t line_nb, object value):
cdef uint8_t* value_b
cdef int value_length
if value is None :
value_b = NULL # TODO
value_length = 0
else :
value_length = len(value)
value_b = <uint8_t*> malloc(value_length * sizeof(uint8_t))
for i in range(value_length) :
value_b[i] = <uint8_t>value[i]
if obi_column_set_obiqual_int_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, value_b, value_length) < 0:
raise Exception("Problem setting a value in a column")
if value is not None :
free(value_b)
cpdef set_str_line(self, index_t line_nb, object value):
cdef bytes value_b
if value is None :
value_b = OBIQual_char_NA
else :
value_b = str2bytes(value)
if obi_column_set_obiqual_char_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, value_b) < 0:
raise Exception("Problem setting a value in a column")
# TODO OR NOT?
# cdef class OBIDMS_column_multi_elts_qual(OBIDMS_column_multi_elts):
#
#
# cpdef object get_item(self, index_t line_nb, str element_name):
# cdef const uint8_t* value
# cdef int value_length
# cdef object result
# cdef int i
# value = obi_column_get_obiqual_int_with_elt_name_in_view(self.view.pointer, (self.pointer)[0], line_nb, str2bytes(element_name), &value_length)
# if obi_errno > 0 :
# raise IndexError(line_nb, element_name)
# if value == NULL: # TODO
# result = None
# else :
# result = []
# for i in range(value_length) :
# result.append(<int>value[i])
# return result
#
# # cpdef object get_str_item(self, index_t line_nb, str element_name):
# # pass
# # cdef char* value
# # cdef object result
# # value = obi_column_get_obiseq_with_elt_name_in_view(self.view.pointer, (self.pointer)[0], line_nb, str2bytes(element_name))
# # if obi_errno > 0 :
# # raise IndexError(line_nb, element_name)
# # if strcmp(value, OBISeq_NA) == 0 :
# # result = None
# # else :
# # result = bytes2str(value)
# # free(value)
# # return result
#
# cpdef object get_line(self, index_t line_nb) :
# pass
# # cdef char* value
# # cdef object value_in_result
# # cdef dict result
# # cdef index_t i
# # cdef bint all_NA
# # result = {}
# # all_NA = True
# # for i in range(self.nb_elements_per_line) :
# # value = obi_column_get_obiseq_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, i)
# # if obi_errno > 0 :
# # raise IndexError(line_nb)
# # if strcmp(value, OBISeq_NA) == 0 :
# # value_in_result = None
# # else :
# # value_in_result = bytes2str(value)
# # free(value)
# # result[self.elements_names[i]] = value_in_result
# # if all_NA and (value_in_result is not None) :
# # all_NA = False
# # if all_NA :
# # result = None
# # return result
#
# cpdef set_item(self, index_t line_nb, str element_name, object value):
# pass
# # cdef bytes value_b
# # if value is None :
# # value_b = OBISeq_NA
# # else :
# # value_b = str2bytes(value)
# # if obi_column_set_obiseq_with_elt_name_in_view(self.view.pointer, (self.pointer)[0], line_nb, str2bytes(element_name), value_b) < 0:
# # raise Exception("Problem setting a value in a column")
# #

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -16,8 +16,12 @@ cdef class OBI_Seq(dict) :
cdef class OBI_Nuc_Seq(OBI_Seq) : cdef class OBI_Nuc_Seq(OBI_Seq) :
cdef object quality
#cpdef str reverse_complement(self) #cpdef str reverse_complement(self)
cpdef set_sequence(self, str sequence) cpdef set_sequence(self, str sequence)
cpdef set_quality(self, object quality)
cpdef get_quality(self)
cdef class OBI_Nuc_Seq_Stored(OBIView_line) : cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
@ -27,4 +31,8 @@ cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
cpdef get_definition(self) cpdef get_definition(self)
cpdef set_sequence(self, str sequence) cpdef set_sequence(self, str sequence)
cpdef get_sequence(self) cpdef get_sequence(self)
cpdef set_quality(self, object quality)
cpdef get_quality(self)
cpdef get_str_quality(self)
# cpdef str reverse_complement(self) # cpdef str reverse_complement(self)

View File

@ -4,7 +4,8 @@ from obitools3.utils cimport bytes2str, str2bytes
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \ from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \ ID_COLUMN, \
DEFINITION_COLUMN DEFINITION_COLUMN, \
QUALITY_COLUMN
cdef class OBI_Seq(dict) : cdef class OBI_Seq(dict) :
@ -27,7 +28,7 @@ cdef class OBI_Seq(dict) :
cpdef get_definition(self) : cpdef get_definition(self) :
return self.definition return self.definition
cpdef get_sequence(self) : cpdef get_sequence(self) :
return self.sequence return self.sequence
@ -41,12 +42,21 @@ cdef class OBI_Nuc_Seq(OBI_Seq) :
self.sequence = sequence self.sequence = sequence
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
cpdef set_quality(self, object quality) :
self.quality = quality
self[bytes2str(QUALITY_COLUMN)] = quality
cpdef get_quality(self) :
return self.quality
# cpdef str reverse_complement(self) : TODO in C ? # cpdef str reverse_complement(self) : TODO in C ?
# pass # pass
cdef class OBI_Nuc_Seq_Stored(OBIView_line) : cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
# TODO store the str version of column name macros
cpdef set_id(self, str id) : cpdef set_id(self, str id) :
self[bytes2str(ID_COLUMN)] = id self[bytes2str(ID_COLUMN)] = id
@ -65,6 +75,18 @@ cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
cpdef get_sequence(self) : cpdef get_sequence(self) :
return self[bytes2str(NUC_SEQUENCE_COLUMN)] return self[bytes2str(NUC_SEQUENCE_COLUMN)]
cpdef set_quality(self, object quality) :
if type(quality) == list :
self[bytes2str(QUALITY_COLUMN)] = quality
else : # Quality is in str form
(((self.view).columns)[bytes2str(QUALITY_COLUMN)]).set_str_line(self.index, quality)
cpdef get_quality(self) :
return self[bytes2str(QUALITY_COLUMN)]
cpdef get_str_quality(self) :
return ((self.view).columns)[bytes2str(QUALITY_COLUMN)].get_str_line(self.index)
# def __str__(self) : # def __str__(self) :
# return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not # return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not

View File

@ -23,8 +23,22 @@
../../../src/obidms_taxonomy.c ../../../src/obidms_taxonomy.c
../../../src/obidms.h ../../../src/obidms.h
../../../src/obidms.c ../../../src/obidms.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h ../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c ../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h ../../../src/obidmscolumn.h
../../../src/obidmscolumn.c ../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h ../../../src/obidmscolumndir.h
@ -37,19 +51,9 @@
../../../src/obitypes.c ../../../src/obitypes.c
../../../src/obiview.h ../../../src/obiview.h
../../../src/obiview.c ../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/sse_banded_LCS_alignment.h ../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c ../../../src/sse_banded_LCS_alignment.c
../../../src/obidmscolumn_bool.c ../../../src/uint8_indexer.h
../../../src/obidmscolumn_bool.h ../../../src/uint8_indexer.c
../../../src/obidmscolumn_char.c ../../../src/utils.h
../../../src/obidmscolumn_char.h ../../../src/utils.c
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -11,6 +11,8 @@ from ..capi.obitypes cimport const_char_p, \
index_t, \ index_t, \
time_t time_t
from libc.stdint cimport uint8_t
cdef extern from "obidmscolumn.h" nogil: cdef extern from "obidmscolumn.h" nogil:
@ -194,3 +196,46 @@ cdef extern from "obidmscolumn_seq.h" nogil:
index_t line_nb, index_t line_nb,
index_t element_idx) index_t element_idx)
cdef extern from "obidmscolumn_qual.h" nogil:
int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const_char_p value)
int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const_char_p value)
int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const uint8_t* value,
int value_length)
int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const uint8_t* value,
int value_length)
char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
const uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
int* value_length)
const uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
int* value_length)

View File

@ -21,6 +21,7 @@ cdef extern from "obitypes.h" nogil:
OBI_FLOAT, OBI_FLOAT,
OBI_BOOL, OBI_BOOL,
OBI_CHAR, OBI_CHAR,
OBI_QUAL,
OBI_STR, OBI_STR,
OBI_SEQ, OBI_SEQ,
OBI_IDX OBI_IDX
@ -46,5 +47,8 @@ cdef extern from "obitypes.h" nogil:
extern obibool_t OBIBool_NA extern obibool_t OBIBool_NA
extern const_char_p OBISeq_NA extern const_char_p OBISeq_NA
extern const_char_p OBIStr_NA extern const_char_p OBIStr_NA
extern const_char_p OBIQual_int_NA
extern const_char_p OBIQual_char_NA
const_char_p name_data_type(int data_type) const_char_p name_data_type(int data_type)

View File

@ -12,6 +12,8 @@ from .obitypes cimport const_char_p, \
from ..capi.obidms cimport OBIDMS_p from ..capi.obidms cimport OBIDMS_p
from ..capi.obidmscolumn cimport OBIDMS_column_p from ..capi.obidmscolumn cimport OBIDMS_column_p
from libc.stdint cimport uint8_t
cdef extern from "obiview.h" nogil: cdef extern from "obiview.h" nogil:
@ -19,6 +21,7 @@ cdef extern from "obiview.h" nogil:
extern const_char_p NUC_SEQUENCE_COLUMN extern const_char_p NUC_SEQUENCE_COLUMN
extern const_char_p ID_COLUMN extern const_char_p ID_COLUMN
extern const_char_p DEFINITION_COLUMN extern const_char_p DEFINITION_COLUMN
extern const_char_p QUALITY_COLUMN
struct Obiview_t : struct Obiview_t :
OBIDMS_p dms OBIDMS_p dms
@ -203,6 +206,54 @@ cdef extern from "obiview.h" nogil:
index_t line_nb, index_t line_nb,
index_t element_idx) index_t element_idx)
int obi_column_set_obiqual_char_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const char* value)
int obi_column_set_obiqual_int_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const uint8_t* value,
int value_length)
char* obi_column_get_obiqual_char_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
const uint8_t* obi_column_get_obiqual_int_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
int* value_length)
int obi_column_set_obiqual_char_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const char* element_name,
const char* value)
int obi_column_set_obiqual_int_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const char* element_name,
const uint8_t* value,
int value_length)
char* obi_column_get_obiqual_char_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const char* element_name)
const uint8_t* obi_column_get_obiqual_int_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const char* element_name,
int* value_length)
int obi_column_set_obistr_with_elt_name_in_view(Obiview_p view, int obi_column_set_obistr_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column, OBIDMS_column_p column,
index_t line_nb, index_t line_nb,

View File

@ -36,7 +36,7 @@ if __name__ == '__main__':
if l['score'] > 350 : if l['score'] > 350 :
line_selec.append(i) line_selec.append(i)
i+=1 i+=1
new_v = d.new_view(args.new_view, view_to_clone=v, line_selection=line_selec, view_type="NUC_SEQS_VIEW", comments="obigrep "+args.view+" to "+args.new_view) #args.key+" "+str(args.comparison)+" "+str(args.value)+" "+) new_v = d.new_view(args.new_view, view_to_clone=v, line_selection=line_selec, view_type="NUC_SEQS_VIEW", comments="obigrep "+args.view+" to "+args.new_view) #args.key+" "+str(args.comparison)+" "+str(args.value)+" "+)
print("\n") print("\n")

View File

@ -14,6 +14,7 @@
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include "char_str_indexer.h"
#include "obiblob.h" #include "obiblob.h"
#include "obiblob_indexer.h" #include "obiblob_indexer.h"
#include "obidebug.h" #include "obidebug.h"
@ -25,24 +26,16 @@
Obi_blob_p obi_str_to_blob(const char* value) Obi_blob_p obi_str_to_blob(const char* value)
{ {
Obi_blob_p value_b; int32_t length;
int32_t length;
// Compute the number of bytes on which the value will be encoded // Compute the number of bytes on which the value will be encoded
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster) length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
value_b = obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length); return obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length);
if (value_b == NULL)
{
obidebug(1, "\nError encoding a character string in a blob");
return NULL;
}
return value_b;
} }
char* obi_blob_to_str(Obi_blob_p value_b) const char* obi_blob_to_str(Obi_blob_p value_b)
{ {
return value_b->value; return value_b->value;
} }
@ -67,7 +60,7 @@ index_t obi_index_char_str(Obi_indexer_p indexer, const char* value)
} }
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx) const char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx)
{ {
Obi_blob_p value_b; Obi_blob_p value_b;

View File

@ -35,7 +35,7 @@
* @since October 2015 * @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
Obi_blob_p obi_str_to_blob(char* value); Obi_blob_p obi_str_to_blob(const char* value);
/** /**
@ -80,7 +80,7 @@ index_t obi_index_char_str(Obi_indexer_p indexer, const char* value);
* @since April 2016 * @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx); const char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx);
#endif /* CHAR_STR_INDEXER_H_ */ #endif /* CHAR_STR_INDEXER_H_ */

View File

@ -14,6 +14,7 @@
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include "dna_seq_indexer.h"
#include "obiblob.h" #include "obiblob.h"
#include "obiblob_indexer.h" #include "obiblob_indexer.h"
#include "obidebug.h" #include "obidebug.h"

View File

@ -23,6 +23,8 @@
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string. #define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
*/ */
#define ELEMENT_SIZE_UINT8 (8) /**< The size of an element from a value of type uint8_t.
*/
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits. #define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
*/ */
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits. #define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.

View File

@ -6,7 +6,7 @@
* @file obidsmcolumn_qual.c * @file obidsmcolumn_qual.c
* @author Celine Mercier * @author Celine Mercier
* @date May 4th 2016 * @date May 4th 2016
* @brief Functions handling OBIColumns containing data in the form of indices referring to sequence quality arrays. * @brief Functions handling OBIColumns containing data in the form of indices referring to sequence qualities.
*/ */
@ -14,9 +14,10 @@
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include "obidmscolumn_qual.h"
#include "obidmscolumn.h" #include "obidmscolumn.h"
#include "obitypes.h" #include "obitypes.h"
#include "obidmscolumn_str.c" #include "uint8_indexer.h"
/********************************************************************** /**********************************************************************
@ -26,38 +27,83 @@
**********************************************************************/ **********************************************************************/
int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value) int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value)
{ // TODO discuss {
return obi_column_set_obistr_with_elt_idx(column, line_nb, element_idx, value); uint8_t* int_value;
int int_value_length;
int i;
int ret_value;
int_value_length = strlen(value);
int_value = (uint8_t*) malloc(int_value_length * sizeof(uint8_t));
// Convert in uint8_t array to index in that format
for (i=0; i<int_value_length; i++)
int_value[i] = ((uint8_t)(value[i])) - QUALITY_ASCII_BASE;
ret_value = obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, int_value, int_value_length);
free(int_value);
return ret_value;
} }
int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value) int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length)
{ {
char* value_char; index_t idx;
char* new_indexer_name;
// Transform the int array into a char array if (obi_column_prepare_to_set_value(column, line_nb) < 0)
// Length?? return -1;
//value_char = ;
obi_column_set_obiqual_char_with_elt_idx(column, line_nb, element_idx, value_char) // Add the value in the indexer
idx = obi_index_uint8(column->indexer, value, value_length);
if (idx == -1) // An error occurred
{
if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR)
{
// If the error is that the indexer is read-only, clone it
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
if (new_indexer_name == NULL)
return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
// Add the value in the new indexer
idx = obi_index_uint8(column->indexer, value, value_length);
if (idx == -1)
return -1;
}
else
return -1;
}
// Add the value's index in the column
*(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = idx;
return 0; return 0;
} }
char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{ // TODO discuss {
char* value; char* value;
const uint8_t* int_value;
int int_value_length;
int i;
value = obi_column_get_obistr_with_elt_idx(column, line_nb, element_idx); int_value = obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, &int_value_length);
if (strcmp(value, OBIStr_NA) == 0)
return OBIQual_char_NA; value = (char*) malloc((int_value_length + 1) * sizeof(char));
// Encode int quality to char quality
for (i=0; i<int_value_length; i++)
value[i] = (char)(int_value[i] + QUALITY_ASCII_BASE);
value[i] = '\0';
return value; return value;
} }
uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) // TODO const? (mapped) const uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length)
{ {
index_t idx; index_t idx;
@ -70,7 +116,7 @@ uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t
if (idx == OBIIdx_NA) if (idx == OBIIdx_NA)
return OBIQual_int_NA; return OBIQual_int_NA;
return obi_retrieve_quality_int(column->indexer, idx); return obi_retrieve_uint8(column->indexer, idx, value_length);
} }
@ -84,13 +130,13 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li
} }
int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, uint8_t* value) int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length)
{ {
index_t element_idx = obi_column_get_element_index_from_name(column, element_name); index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA) if (element_idx == OBIIdx_NA)
return -1; return -1;
return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value); return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value, value_length);
} }
@ -104,12 +150,12 @@ char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t
} }
uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name) // TODO const? (mapped) const uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length)
{ {
index_t element_idx = obi_column_get_element_index_from_name(column, element_name); index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA) if (element_idx == OBIIdx_NA)
return OBIQual_int_NA; return OBIQual_int_NA;
return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx); return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, value_length);
} }

View File

@ -6,7 +6,7 @@
* @file obidsmcolumn_qual.h * @file obidsmcolumn_qual.h
* @author Celine Mercier * @author Celine Mercier
* @date May 4th 2016 * @date May 4th 2016
* @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to sequence quality arrays. * @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to sequence qualities.
*/ */
@ -22,6 +22,12 @@
#include "obitypes.h" #include "obitypes.h"
#define QUALITY_ASCII_BASE (33) /**< The ASCII base of sequence quality.
* Used to convert sequence qualities from characters to integers
* and the other way around.
*/
/** /**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring * @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line. * to sequence qualities handled by an indexer, and using the index of the element in the column's line.
@ -56,7 +62,8 @@ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t lin
* @param column A pointer as returned by obi_create_column() or obi_clone_column(). * @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set. * @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line. * @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set, in the integer format. * @param value The value that should be set, in the integer array format.
* @param value_length The length of the integer array.
* *
* @returns An integer value indicating the success of the operation. * @returns An integer value indicating the success of the operation.
* @retval 0 on success. * @retval 0 on success.
@ -65,7 +72,7 @@ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t lin
* @since May 2016 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, uint8_t* value); int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length);
/** /**
@ -79,7 +86,7 @@ int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line
* @param element_idx The index of the element that should be recovered in the line. * @param element_idx The index of the element that should be recovered in the line.
* *
* @returns The recovered value, in the character string format. * @returns The recovered value, in the character string format.
* @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. * @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
* *
* @since May 2016 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -96,14 +103,15 @@ char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t l
* @param column A pointer as returned by obi_create_column(). * @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered. * @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line. * @param element_idx The index of the element that should be recovered in the line.
* @param value_length A pointer on an integer to store the length of the integer array recovered.
* *
* @returns The recovered value, in the integer format. * @returns The recovered value, in the integer array format.
* @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. * @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set.
* *
* @since May 2016 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx); const uint8_t* obi_column_get_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length);
/** /**
@ -133,7 +141,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring * @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line. * to sequence qualities handled by an indexer, and using the index of the element in the column's line.
* *
* This function is for quality scores in the integer format. * This function is for quality scores in the integer array format.
* *
* @warning Pointers returned by obi_open_column() don't allow writing. * @warning Pointers returned by obi_open_column() don't allow writing.
* *
@ -141,6 +149,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li
* @param line_nb The number of the line where the value should be set. * @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line. * @param element_name The name of the element that should be set in the line.
* @param value The value that should be set, in the integer format. * @param value The value that should be set, in the integer format.
* @param value_length The length of the integer array.
* *
* @returns An integer value indicating the success of the operation. * @returns An integer value indicating the success of the operation.
* @retval 0 on success. * @retval 0 on success.
@ -149,7 +158,7 @@ int obi_column_set_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t li
* @since May 2016 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, uint8_t* value); int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length);
/** /**
@ -163,7 +172,7 @@ int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t lin
* @param element_name The name of the element that should be recovered in the line. * @param element_name The name of the element that should be recovered in the line.
* *
* @returns The recovered value, in the character string format. * @returns The recovered value, in the character string format.
* @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. * @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
* *
* @since May 2016 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -175,19 +184,20 @@ char* obi_column_get_obiqual_char_with_elt_name(OBIDMS_column_p column, index_t
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line. * to sequence qualities handled by an indexer, and using the index of the element in the column's line.
* *
* This function returns quality scores in the integer format. * This function returns quality scores in the integer array format.
* *
* @param column A pointer as returned by obi_create_column() or obi_clone_column(). * @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered. * @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line. * @param element_name The name of the element that should be recovered in the line.
* @param value_length A pointer on an integer to store the length of the integer array recovered.
* *
* @returns The recovered value, in the integer format. * @returns The recovered value, in the integer format.
* @retval OBIQual_NA the NA value of the type if an error occurred and obi_errno is set. * @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set.
* *
* @since May 2016 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
utin8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name); const uint8_t* obi_column_get_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length);
#endif /* OBIDMSCOLUMN_QUAL_H_ */ #endif /* OBIDMSCOLUMN_QUAL_H_ */

View File

@ -29,17 +29,6 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
index_t idx; index_t idx;
char* new_indexer_name; char* new_indexer_name;
// TODO
// size_t i;
// uint8_t q;
// for (i=0;i<=strlen(value);i++)
// {
// if ()
// q = ((uint8_t) value[i]) - 33;
// fprintf(stderr, "\n%c == %u", value[i], q);
// }
if (obi_column_prepare_to_set_value(column, line_nb) < 0) if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1; return -1;

View File

@ -25,7 +25,7 @@
#define OBISeq_NA ("\0") /**< NA value for the type OBI_SEQ */ // TODO discuss #define OBISeq_NA ("\0") /**< NA value for the type OBI_SEQ */ // TODO discuss
#define OBIStr_NA ("\0") /**< NA value for the type OBI_STR */ // TODO discuss #define OBIStr_NA ("\0") /**< NA value for the type OBI_STR */ // TODO discuss
#define OBIQual_char_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in character string format */ // TODO test and discuss #define OBIQual_char_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in character string format */ // TODO test and discuss
#define OBIQual_int_NA ("\0") /**< NA value for the type OBI_QUAL if the quality is in integer format */ // TODO test and discuss #define OBIQual_int_NA (NULL) /**< NA value for the type OBI_QUAL if the quality is in integer format */ // TODO test and discuss
/** /**

View File

@ -24,6 +24,7 @@
#include "obidmscolumn_char.h" #include "obidmscolumn_char.h"
#include "obidmscolumn_float.h" #include "obidmscolumn_float.h"
#include "obidmscolumn_int.h" #include "obidmscolumn_int.h"
#include "obidmscolumn_qual.h"
#include "obidmscolumn_seq.h" #include "obidmscolumn_seq.h"
#include "obidmscolumn_str.h" #include "obidmscolumn_str.h"
#include "obierrno.h" #include "obierrno.h"
@ -600,8 +601,6 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
if (view_to_clone == NULL) if (view_to_clone == NULL)
{ {
// TODO Add quality column?
// Adding sequence column // Adding sequence column
if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", "Nucleotide sequences", true) < 0) if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", "Nucleotide sequences", true) < 0)
{ {
@ -620,6 +619,12 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL; return NULL;
} }
// Adding quality column
if (obi_view_add_column(view, QUALITY_COLUMN, -1, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", "Sequence qualities", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
} }
return view; return view;
@ -1490,6 +1495,78 @@ obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_colu
/****************************************/ /****************************************/
/*********** FOR QUAL COLUMNS ***********/
int obi_column_set_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value)
{
if (prepare_to_set_value_in_column(view, &column, &line_nb) < 0)
return -1;
return obi_column_set_obiqual_char_with_elt_idx(column, line_nb, element_idx, value);
}
int obi_column_set_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length)
{
if (prepare_to_set_value_in_column(view, &column, &line_nb) < 0)
return -1;
return obi_column_set_obiqual_int_with_elt_idx(column, line_nb, element_idx, value, value_length);
}
char* obi_column_get_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if (prepare_to_get_value_from_column(view, &line_nb) < 0)
return OBIQual_char_NA;
return obi_column_get_obiqual_char_with_elt_idx(column, line_nb, element_idx);
}
const uint8_t* obi_column_get_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length)
{
if (prepare_to_get_value_from_column(view, &line_nb) < 0)
return OBIQual_int_NA;
return obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, value_length);
}
int obi_column_set_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value)
{
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
return obi_column_set_obiqual_char_with_elt_idx_in_view(view, column, line_nb, element_idx, value);
}
int obi_column_set_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length)
{
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
return obi_column_set_obiqual_int_with_elt_idx_in_view(view, column, line_nb, element_idx, value, value_length);
}
char* obi_column_get_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIQual_char_NA;
return obi_column_get_obiqual_char_with_elt_idx_in_view(view, column, line_nb, element_idx);
}
const uint8_t* obi_column_get_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length)
{
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIQual_int_NA;
return obi_column_get_obiqual_int_with_elt_idx_in_view(view, column, line_nb, element_idx, value_length);
}
/****************************************/
/*********** FOR SEQ COLUMNS ***********/ /*********** FOR SEQ COLUMNS ***********/
int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value) int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value)

View File

@ -17,6 +17,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <errno.h> #include <errno.h>
#include <stdio.h> #include <stdio.h>
#include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#include <time.h> #include <time.h>
#include <math.h> #include <math.h>
@ -50,6 +51,9 @@
#define DEFINITION_COLUMN "DEFINITION" /**< The name of the column containing the sequence definitions #define DEFINITION_COLUMN "DEFINITION" /**< The name of the column containing the sequence definitions
* in NUC_SEQS_VIEW views. * in NUC_SEQS_VIEW views.
*/ */
#define QUALITY_COLUMN "QUALITY" /**< The name of the column containing the sequence qualities
* in NUC_SEQS_VIEW views.
*/
/** /**
@ -215,6 +219,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored * - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
* - ID_COLUMN where sequence identifiers are stored * - ID_COLUMN where sequence identifiers are stored
* - DEFINITION_COLUMN where sequence definitions are stored * - DEFINITION_COLUMN where sequence definitions are stored
* - QUALITY_COLUMN where sequence qualities are stored
* *
* @param dms A pointer on the OBIDMS. * @param dms A pointer on the OBIDMS.
* @param view_name The unique name of the view. * @param view_name The unique name of the view.
@ -246,6 +251,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored * - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
* - ID_COLUMN where sequence identifiers are stored * - ID_COLUMN where sequence identifiers are stored
* - DEFINITION_COLUMN where sequence definitions are stored * - DEFINITION_COLUMN where sequence definitions are stored
* - QUALITY_COLUMN where sequence qualities are stored
* *
* @param dms A pointer on the OBIDMS. * @param dms A pointer on the OBIDMS.
* @param view_name The unique name of the new view. * @param view_name The unique name of the new view.
@ -803,6 +809,194 @@ int obi_column_set_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p
obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name); obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
* in the context of a view.
*
* This function is for qualities in the character string format.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param view A pointer on the opened view.
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set, in the character string format.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
* in the context of a view.
*
* This function is for qualities in the integer format.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param view A pointer on the opened view.
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set, in the integer array format.
* @param value_length The length of the integer array.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const uint8_t* value, int value_length);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
* in the context of a view.
*
* This function returns quality scores in the character string format.
*
* @param view A pointer on the opened view.
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value, in the character string format.
* @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_get_obiqual_char_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
* in the context of a view.
*
* This function returns quality scores in the integer format.
*
* @param view A pointer on the opened view.
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
* @param value_length A pointer on an integer to store the length of the integer array recovered.
*
* @returns The recovered value, in the integer array format.
* @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const uint8_t* obi_column_get_obiqual_int_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, int* value_length);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
* in the context of a view.
*
* This function is for quality scores in the character string format.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param view A pointer on the opened view.
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set, in the character string format.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
* in the context of a view.
*
* This function is for quality scores in the integer array format.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param view A pointer on the opened view.
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set, in the integer format.
* @param value_length The length of the integer array.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const uint8_t* value, int value_length);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
* in the context of a view.
*
* This function returns quality scores in the character string format.
*
* @param view A pointer on the opened view.
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value, in the character string format.
* @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_get_obiqual_char_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to sequence qualities handled by an indexer, and using the index of the element in the column's line,
* in the context of a view.
*
* This function returns quality scores in the integer array format.
*
* @param view A pointer on the opened view.
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
* @param value_length A pointer on an integer to store the length of the integer array recovered.
*
* @returns The recovered value, in the integer format.
* @retval OBIQual_int_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const uint8_t* obi_column_get_obiqual_int_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, int* value_length);
/** /**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_SEQ, using the index of the element in the line, * @brief Sets a value in an OBIDMS column containing data with the type OBI_SEQ, using the index of the element in the line,
* in the context of a view. * in the context of a view.

View File

@ -1,19 +1,21 @@
/**************************************************************************** /****************************************************************************
* Sequence quality scores indexing functions * * Uint8 indexing functions *
****************************************************************************/ ****************************************************************************/
/** /**
* @file quality_indexer.c * @file uint8_indexer.c
* @author Celine Mercier * @author Celine Mercier
* @date May 4th 2016 * @date May 4th 2016
* @brief Functions handling the indexing and retrieval of sequence quality scores. * @brief Functions handling the indexing and retrieval of uint8 arrays.
*/ */
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <stdint.h>
#include <math.h> #include <math.h>
#include "uint8_indexer.h"
#include "obiblob.h" #include "obiblob.h"
#include "obiblob_indexer.h" #include "obiblob_indexer.h"
#include "obidebug.h" #include "obidebug.h"
@ -23,60 +25,25 @@
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
Obi_blob_p obi_uint8_to_blob(const char* quality) Obi_blob_p obi_uint8_to_blob(const uint8_t* value, int value_length)
{ {
Obi_blob_p value_b; return obi_blob((byte_t*)value, ELEMENT_SIZE_UINT8, value_length, value_length);
int32_t length_encoded_seq; // length of the encoded sequence in bytes
int32_t seq_length;
byte_t* encoded_seq;
seq_length = strlen(seq);
// Check if just ATGC and encode accordingly
if (only_ATGC(seq))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
// Encode
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
// Encode
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
}
free(encoded_seq);
return value_b;
} }
char* obi_blob_to_quality_char(Obi_blob_p value_b) const uint8_t* obi_blob_to_uint8(Obi_blob_p value_b)
{ {
// Decode return ((uint8_t*) (value_b->value));
if (value_b->element_size == 2)
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
else
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
} }
index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value) index_t obi_index_uint8(Obi_indexer_p indexer, const uint8_t* value, int value_length)
{ {
Obi_blob_p value_b; Obi_blob_p value_b;
index_t idx; index_t idx;
// Encode value // Encode value
value_b = obi_seq_to_blob(value); value_b = obi_uint8_to_blob(value, value_length);
if (value_b == NULL) if (value_b == NULL)
return -1; return -1;
@ -89,7 +56,7 @@ index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value)
} }
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx) const uint8_t* obi_retrieve_uint8(Obi_indexer_p indexer, index_t idx, int* value_length)
{ {
Obi_blob_p value_b; Obi_blob_p value_b;
@ -97,6 +64,7 @@ char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
value_b = obi_indexer_get(indexer, idx); value_b = obi_indexer_get(indexer, idx);
// Return decoded sequence // Return decoded sequence
return obi_blob_to_seq(value_b); *value_length = value_b->length_decoded_value;
return obi_blob_to_uint8(value_b);
} }

View File

@ -1,17 +1,17 @@
/**************************************************************************** /****************************************************************************
* DNA sequence indexer header file * * uint8 indexer header file *
****************************************************************************/ ****************************************************************************/
/** /**
* @file dna_seq_indexer.h * @file uint8_indexer.h
* @author Celine Mercier * @author Celine Mercier
* @date April 12th 2016 * @date May 4th 2016
* @brief Header file for the functions handling the indexing of DNA sequences. * @brief Header file for the functions handling the indexing of uint8 arrays.
*/ */
#ifndef DNA_SEQ_INDEXER_H_ #ifndef UINT8_INDEXER_H_
#define DNA_SEQ_INDEXER_H_ #define UINT8_INDEXER_H_
#include <stdlib.h> #include <stdlib.h>
@ -24,64 +24,69 @@
/** /**
* @brief Converts a DNA sequence to a blob. * @brief Converts an uint8 array to a blob.
* *
* @warning The blob must be freed by the caller. * @warning The blob must be freed by the caller.
* *
* @param value The DNA sequence to convert. * @param value The uint8 array to convert.
* @param value_length The length of the uint8 array to convert.
* *
* @returns A pointer to the blob created. * @returns A pointer on the blob created.
* @retval NULL if an error occurred. * @retval NULL if an error occurred.
* *
* @since November 2015 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
Obi_blob_p obi_seq_to_blob(const char* seq); Obi_blob_p obi_uint8_to_blob(const uint8_t* value, int value_length);
/** /**
* @brief Converts a blob to a DNA sequence. * @brief Converts a blob to an uint8 array.
*
* @warning The array returned is mapped.
* *
* @param value_b The blob to convert. * @param value_b The blob to convert.
* *
* @returns A pointer to the DNA sequence contained in the blob. * @returns A pointer on the uint8 array contained in the blob.
* @retval NULL if an error occurred. * @retval NULL if an error occurred.
* *
* @since November 2015 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
char* obi_blob_to_seq(Obi_blob_p value_b); const uint8_t* obi_blob_to_uint8(Obi_blob_p value_b);
/** /**
* @brief Stores a DNA sequence in an indexer and returns the index. * @brief Stores an uint8 array in an indexer and returns the index.
* *
* @param indexer The indexer structure. * @param indexer The indexer structure.
* @param value The DNA sequence to index. * @param value The uint8 array to index.
* @param value_length The length of the uint8 array to index.
* *
* @returns The index referring to the stored DNA sequence in the indexer. * @returns The index referring to the stored uint8 array in the indexer.
* *
* @since April 2016 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value); index_t obi_index_uint8(Obi_indexer_p indexer, const uint8_t* value, int value_length);
/** /**
* @brief Retrieves a DNA sequence from an indexer. * @brief Retrieves an uint8 array from an indexer.
* *
* @warning The DNA sequence returned must be freed by the caller. * @warning The array returned is mapped.
* *
* @param indexer The indexer structure. * @param indexer The indexer structure.
* @param idx The index referring to the DNA sequence to retrieve in the indexer. * @param idx The index referring to the uint8 array to retrieve in the indexer.
* @param value_length A pointer on an integer to store the length of the array retrieved.
* *
* @returns A pointer on the DNA sequence. * @returns A pointer on the uint8 array.
* *
* @since April 2016 * @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx); const uint8_t* obi_retrieve_uint8(Obi_indexer_p indexer, index_t idx, int* value_length);
#endif /* DNA_SEQ_INDEXER_H_ */ #endif /* UINT8_INDEXER_H_ */