Merged master fixed conflict.

This commit is contained in:
Celine Mercier
2017-02-14 10:58:43 +01:00
33 changed files with 5202 additions and 1256 deletions

View File

@ -0,0 +1,65 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
../../../src/obi_align.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_blob.c
../../../src/obidmscolumn_blob.h
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_qual.h
../../../src/obidmscolumn_qual.c
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/sse_banded_LCS_alignment.h
../../../src/sse_banded_LCS_alignment.c
../../../src/uint8_indexer.h
../../../src/uint8_indexer.c
../../../src/upperband.h
../../../src/upperband.c
../../../src/utils.h
../../../src/utils.c

View File

@ -4,7 +4,8 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.obidms._obidms cimport OBIDMS # TODO cimport doesn't work
from obitools3.utils cimport str2bytes
from obitools3.obidms.capi.obialign cimport obi_lcs_align_one_column
from obitools3.obidms.capi.obialign cimport obi_lcs_align_one_column, \
obi_lcs_align_two_columns
import time
@ -146,6 +147,13 @@ def addOptions(parser):
default=False,
help="Sequence counts are written in the output view. Default: they are not written.")
group.add_argument('--thread-count','-p', # TODO should probably be in a specific option group
action="store", dest="align:threadcount",
metavar='<THREAD COUNT>',
default=1,
type=int,
help="Number of threads to use for the computation. Default: one.")
cpdef align(str dms_n,
str input_view_1_n, str output_view_n,
@ -156,24 +164,42 @@ cpdef align(str dms_n,
double threshold=0.0, bint normalize=True,
int reference=0, bint similarity_mode=True,
bint print_seq=False, bint print_count=False,
comments="") :
comments="",
int thread_count=1) :
cdef OBIDMS d
d = OBIDMS(dms_n)
# Align 1 column (2 columns not implemented yet)
if obi_lcs_align_one_column(d._pointer, \
str2bytes(input_view_1_n), \
str2bytes(input_column_1_n), \
str2bytes(input_elt_1_n), \
str2bytes(id_column_1_n), \
str2bytes(output_view_n), \
str2bytes(comments), \
print_seq, \
print_count, \
threshold, normalize, reference, similarity_mode) < 0 :
raise Exception("Error aligning sequences")
if input_view_2_n == "" and input_column_2_n == "" :
if obi_lcs_align_one_column(d._pointer, \
str2bytes(input_view_1_n), \
str2bytes(input_column_1_n), \
str2bytes(input_elt_1_n), \
str2bytes(id_column_1_n), \
str2bytes(output_view_n), \
str2bytes(comments), \
print_seq, \
print_count, \
threshold, normalize, reference, similarity_mode,
thread_count) < 0 :
raise Exception("Error aligning sequences")
else :
if obi_lcs_align_two_columns(d._pointer, \
str2bytes(input_view_1_n), \
str2bytes(input_view_2_n), \
str2bytes(input_column_1_n), \
str2bytes(input_column_2_n), \
str2bytes(input_elt_1_n), \
str2bytes(input_elt_2_n), \
str2bytes(id_column_1_n), \
str2bytes(id_column_2_n), \
str2bytes(output_view_n), \
str2bytes(comments), \
print_seq, \
print_count, \
threshold, normalize, reference, similarity_mode) < 0 :
raise Exception("Error aligning sequences")
d.close()
@ -199,8 +225,9 @@ def run(config):
similarity_mode = config['align']['similarity'], \
print_seq = config['align']['printseq'], \
print_count = config['align']['printcount'], \
comments = comments)
comments = comments, \
thread_count = config['align']['threadcount'])
print("Done.")

View File

@ -97,8 +97,7 @@ def test_set_and_get(config, infos):
return
idx = random_int(config)
value = infos['random_generator'][data_type](config)
if len(element_names) > 1 :
if col.nb_elements_per_line > 1 :
elt = random.choice(element_names)
col[idx][elt] = value
assert col[idx][elt] == value, "Set value != gotten value "+str(col[idx][elt])+" != "+str(value)
@ -187,6 +186,7 @@ def create_random_column(config, infos) :
elements_names = []
for i in range(nb_elements_per_line) :
elements_names.append(random_unique_element_name(config, infos))
elements_names = random.choice([None, elements_names])
name = random_unique_name(infos)
infos['view'].add_column(name,
alias=alias,
@ -358,7 +358,9 @@ def run(config):
config['test']['elt_name_max_len'] = int((COL_COMMENTS_MAX_LEN - config['test']['maxelts']) / config['test']['maxelts'])
print("Initializing the DMS and the first view...")
shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
ini_dms_and_first_view(config, infos)
print_test(config, repr(infos['view']))

View File

@ -7,7 +7,8 @@ from .capi.obidms cimport obi_dms, \
from .capi.obidmscolumn cimport obi_close_column, \
OBIDMS_column_p, \
OBIDMS_column_header_p
OBIDMS_column_header_p, \
obi_get_elements_names
from .capi.obiutils cimport obi_format_date
@ -73,7 +74,7 @@ from .capi.obiview cimport Obiview_p, \
DEFINITION_COLUMN, \
QUALITY_COLUMN
from libc.stdlib cimport malloc
from libc.stdlib cimport malloc, free
@ -100,17 +101,17 @@ cdef class OBIDMS_column :
def __getitem__(self, index_t line_nb):
return self.get_line(line_nb)
def __len__(self):
return self.lines_used
def __len__(self): # TODO discuss
return self._view.line_count
def __sizeof__(self):
return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size)
def __iter__(self):
def __iter__(self): # TODO discuss
# Declarations
cdef index_t line_nb
# Yield each line
for line_nb in range(self.lines_used):
for line_nb in range(self._view.line_count):
yield self.get_line(line_nb)
def __str__(self) :
@ -138,7 +139,12 @@ cdef class OBIDMS_column :
# elements_names property getter
@property
def elements_names(self):
return (bytes2str(((self._pointer)[0].header).elements_names)).split(';')
cdef char* elts_names_b
cdef str elts_names
elts_names_b = obi_get_elements_names((self._pointer)[0])
elts_names = bytes2str(elts_names_b)
free(<char*>elts_names_b)
return elts_names.split(';')
# nb_elements_per_line property getter
@property
@ -160,11 +166,6 @@ cdef class OBIDMS_column :
def version(self):
return ((self._pointer)[0].header).version
# lines_used property getter
@property
def lines_used(self):
return (self._pointer)[0].header.lines_used
# comments property getter
@property
def comments(self):
@ -268,15 +269,6 @@ cdef class OBIDMS_column_line :
######################################################################################################
######################################################################################################
######################################################################################################
cdef class OBIView_line :
def __init__(self, OBIView view, index_t line_nb) :
@ -424,7 +416,7 @@ cdef class OBIDMS :
cdef int i, j
cdef str column_name
view_infos_p = obi_view_map_file(self._pointer, str2bytes(view_name))
view_infos_p = obi_view_map_file(self._pointer, str2bytes(view_name), True)
view_infos_d = {}
view_infos_d["name"] = bytes2str(view_infos_p.name)
view_infos_d["comments"] = bytes2str(view_infos_p.comments)

View File

@ -22,7 +22,7 @@ cdef class OBIDMS_column_str(OBIDMS_column):
result = None
else :
result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
return result
cpdef set_line(self, index_t line_nb, object value):
@ -46,7 +46,7 @@ cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
result = None
else :
result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
return result
cpdef object get_line(self, index_t line_nb) :
@ -65,7 +65,7 @@ cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
value_in_result = None
else :
value_in_result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False

View File

@ -17,4 +17,5 @@ cdef class OBI_Taxonomy :
cdef class OBI_Taxon :
cdef ecotx_t* _pointer
cdef ecotx_t* _pointer
cdef OBI_Taxonomy _tax

View File

@ -7,10 +7,10 @@ from .capi.obitaxonomy cimport obi_read_taxonomy, \
obi_write_taxonomy, \
obi_close_taxonomy, \
obi_taxo_get_taxon_with_taxid, \
obi_taxonomy_add_local_taxon, \
obi_taxo_add_local_taxon, \
obi_taxo_add_preferred_name_with_taxon, \
ecotx_t
from ._obidms cimport OBIDMS
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
@ -42,11 +42,11 @@ cdef class OBI_Taxonomy :
if taxon_p == NULL :
raise Exception("Taxon not found")
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
return OBI_Taxon(taxon_capsule)
return OBI_Taxon(taxon_capsule, self)
else :
raise Exception("Not implemented")
def __iter__(self):
cdef ecotx_t* taxa
@ -60,7 +60,7 @@ cdef class OBI_Taxonomy :
for t in range(self._pointer.taxa.count):
taxon_p = <ecotx_t*> (taxa+t)
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
yield OBI_Taxon(taxon_capsule)
yield OBI_Taxon(taxon_capsule, self)
cpdef write(self, str prefix) :
@ -70,7 +70,7 @@ cdef class OBI_Taxonomy :
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
cdef int taxid
taxid = obi_taxonomy_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
taxid = obi_taxo_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
if taxid < 0 :
raise Exception("Error adding a new taxon to the taxonomy")
else :
@ -85,10 +85,11 @@ cdef class OBI_Taxonomy :
cdef class OBI_Taxon : # TODO dict subclass?
def __init__(self, object taxon_capsule) :
def __init__(self, object taxon_capsule, OBI_Taxonomy tax) :
self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
if self._pointer == NULL :
raise Exception("Error reading the taxonomy")
raise Exception("Error reading a taxon (NULL pointer)")
self._tax = tax
# name property getter
@property
@ -115,14 +116,25 @@ cdef class OBI_Taxon : # TODO dict subclass?
def parent(self):
cdef object parent_capsule
parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
return OBI_Taxon(parent_capsule)
return OBI_Taxon(parent_capsule, self._tax)
# preferred name property getter and setter
@property
def preferred_name(self):
if self._pointer.preferred_name != NULL :
return bytes2str(self._pointer.preferred_name)
@preferred_name.setter
def preferred_name(self, str new_preferred_name) : # @DuplicatedSignature
if (obi_taxo_add_preferred_name_with_taxon(self._tax._pointer, self._pointer, str2bytes(new_preferred_name)) < 0) :
raise Exception("Error adding a new preferred name to a taxon")
def __repr__(self):
d = {}
d['taxid'] = self.taxid
d['name'] = self.name
d['parent'] = self.parent.taxid
d['farest'] = self.farest
d['taxid'] = self.taxid
d['name'] = self.name
d['preferred name'] = self.preferred_name
d['parent'] = self.parent.taxid
d['farest'] = self.farest
return str(d)

View File

@ -0,0 +1,65 @@
../../../../src/bloom.h
../../../../src/bloom.c
../../../../src/char_str_indexer.h
../../../../src/char_str_indexer.c
../../../../src/crc64.h
../../../../src/crc64.c
../../../../src/dna_seq_indexer.h
../../../../src/dna_seq_indexer.c
../../../../src/encode.h
../../../../src/encode.c
../../../../src/hashtable.h
../../../../src/hashtable.c
../../../../src/murmurhash2.h
../../../../src/murmurhash2.c
../../../../src/obi_align.h
../../../../src/obi_align.c
../../../../src/obiavl.h
../../../../src/obiavl.c
../../../../src/obiblob_indexer.h
../../../../src/obiblob_indexer.c
../../../../src/obiblob.h
../../../../src/obiblob.c
../../../../src/obidebug.h
../../../../src/obidms_taxonomy.h
../../../../src/obidms_taxonomy.c
../../../../src/obidms.h
../../../../src/obidms.c
../../../../src/obidmscolumn_blob.c
../../../../src/obidmscolumn_blob.h
../../../../src/obidmscolumn_bool.c
../../../../src/obidmscolumn_bool.h
../../../../src/obidmscolumn_char.c
../../../../src/obidmscolumn_char.h
../../../../src/obidmscolumn_float.c
../../../../src/obidmscolumn_float.h
../../../../src/obidmscolumn_idx.h
../../../../src/obidmscolumn_idx.c
../../../../src/obidmscolumn_int.c
../../../../src/obidmscolumn_int.h
../../../../src/obidmscolumn_qual.h
../../../../src/obidmscolumn_qual.c
../../../../src/obidmscolumn_seq.c
../../../../src/obidmscolumn_seq.h
../../../../src/obidmscolumn_str.c
../../../../src/obidmscolumn_str.h
../../../../src/obidmscolumn.h
../../../../src/obidmscolumn.c
../../../../src/obidmscolumndir.h
../../../../src/obidmscolumndir.c
../../../../src/obierrno.h
../../../../src/obierrno.c
../../../../src/obilittlebigman.h
../../../../src/obilittlebigman.c
../../../../src/obitypes.h
../../../../src/obitypes.c
../../../../src/obiview.h
../../../../src/obiview.c
../../../../src/sse_banded_LCS_alignment.h
../../../../src/sse_banded_LCS_alignment.c
../../../../src/uint8_indexer.h
../../../../src/uint8_indexer.c
../../../../src/upperband.h
../../../../src/upperband.c
../../../../src/utils.h
../../../../src/utils.c

View File

@ -18,5 +18,24 @@ cdef extern from "obi_align.h" nogil:
double threshold,
bint normalize,
int reference,
bint similarity_mode)
bint similarity_mode,
int thread_count)
int obi_lcs_align_two_columns(OBIDMS_p dms,
const_char_p seq1_view_name,
const_char_p seq2_view_name,
const_char_p seq1_column_name,
const_char_p seq2_column_name,
const_char_p seq1_elt_name,
const_char_p seq2_elt_name,
const_char_p id1_column_name,
const_char_p id2_column_name,
const_char_p output_view_name,
const_char_p output_view_comments,
bint print_seq,
bint print_count,
double threshold,
bint normalize,
int reference,
bint similarity_mode);

View File

@ -0,0 +1,65 @@
../../../../src/bloom.h
../../../../src/bloom.c
../../../../src/char_str_indexer.h
../../../../src/char_str_indexer.c
../../../../src/crc64.h
../../../../src/crc64.c
../../../../src/dna_seq_indexer.h
../../../../src/dna_seq_indexer.c
../../../../src/encode.h
../../../../src/encode.c
../../../../src/hashtable.h
../../../../src/hashtable.c
../../../../src/murmurhash2.h
../../../../src/murmurhash2.c
../../../../src/obi_align.h
../../../../src/obi_align.c
../../../../src/obiavl.h
../../../../src/obiavl.c
../../../../src/obiblob_indexer.h
../../../../src/obiblob_indexer.c
../../../../src/obiblob.h
../../../../src/obiblob.c
../../../../src/obidebug.h
../../../../src/obidms_taxonomy.h
../../../../src/obidms_taxonomy.c
../../../../src/obidms.h
../../../../src/obidms.c
../../../../src/obidmscolumn_blob.c
../../../../src/obidmscolumn_blob.h
../../../../src/obidmscolumn_bool.c
../../../../src/obidmscolumn_bool.h
../../../../src/obidmscolumn_char.c
../../../../src/obidmscolumn_char.h
../../../../src/obidmscolumn_float.c
../../../../src/obidmscolumn_float.h
../../../../src/obidmscolumn_idx.h
../../../../src/obidmscolumn_idx.c
../../../../src/obidmscolumn_int.c
../../../../src/obidmscolumn_int.h
../../../../src/obidmscolumn_qual.h
../../../../src/obidmscolumn_qual.c
../../../../src/obidmscolumn_seq.c
../../../../src/obidmscolumn_seq.h
../../../../src/obidmscolumn_str.c
../../../../src/obidmscolumn_str.h
../../../../src/obidmscolumn.h
../../../../src/obidmscolumn.c
../../../../src/obidmscolumndir.h
../../../../src/obidmscolumndir.c
../../../../src/obierrno.h
../../../../src/obierrno.c
../../../../src/obilittlebigman.h
../../../../src/obilittlebigman.c
../../../../src/obitypes.h
../../../../src/obitypes.c
../../../../src/obiview.h
../../../../src/obiview.c
../../../../src/sse_banded_LCS_alignment.h
../../../../src/sse_banded_LCS_alignment.c
../../../../src/uint8_indexer.h
../../../../src/uint8_indexer.c
../../../../src/upperband.h
../../../../src/upperband.c
../../../../src/utils.h
../../../../src/utils.c

View File

@ -0,0 +1,65 @@
../../../../src/bloom.h
../../../../src/bloom.c
../../../../src/char_str_indexer.h
../../../../src/char_str_indexer.c
../../../../src/crc64.h
../../../../src/crc64.c
../../../../src/dna_seq_indexer.h
../../../../src/dna_seq_indexer.c
../../../../src/encode.h
../../../../src/encode.c
../../../../src/hashtable.h
../../../../src/hashtable.c
../../../../src/murmurhash2.h
../../../../src/murmurhash2.c
../../../../src/obi_align.h
../../../../src/obi_align.c
../../../../src/obiavl.h
../../../../src/obiavl.c
../../../../src/obiblob_indexer.h
../../../../src/obiblob_indexer.c
../../../../src/obiblob.h
../../../../src/obiblob.c
../../../../src/obidebug.h
../../../../src/obidms_taxonomy.h
../../../../src/obidms_taxonomy.c
../../../../src/obidms.h
../../../../src/obidms.c
../../../../src/obidmscolumn_blob.c
../../../../src/obidmscolumn_blob.h
../../../../src/obidmscolumn_bool.c
../../../../src/obidmscolumn_bool.h
../../../../src/obidmscolumn_char.c
../../../../src/obidmscolumn_char.h
../../../../src/obidmscolumn_float.c
../../../../src/obidmscolumn_float.h
../../../../src/obidmscolumn_idx.h
../../../../src/obidmscolumn_idx.c
../../../../src/obidmscolumn_int.c
../../../../src/obidmscolumn_int.h
../../../../src/obidmscolumn_qual.h
../../../../src/obidmscolumn_qual.c
../../../../src/obidmscolumn_seq.c
../../../../src/obidmscolumn_seq.h
../../../../src/obidmscolumn_str.c
../../../../src/obidmscolumn_str.h
../../../../src/obidmscolumn.h
../../../../src/obidmscolumn.c
../../../../src/obidmscolumndir.h
../../../../src/obidmscolumndir.c
../../../../src/obierrno.h
../../../../src/obierrno.c
../../../../src/obilittlebigman.h
../../../../src/obilittlebigman.c
../../../../src/obitypes.h
../../../../src/obitypes.c
../../../../src/obiview.h
../../../../src/obiview.c
../../../../src/sse_banded_LCS_alignment.h
../../../../src/sse_banded_LCS_alignment.c
../../../../src/uint8_indexer.h
../../../../src/uint8_indexer.c
../../../../src/upperband.h
../../../../src/upperband.c
../../../../src/utils.h
../../../../src/utils.c

View File

@ -47,31 +47,8 @@ cdef extern from "obidmscolumn.h" nogil:
bint writable
ctypedef OBIDMS_column_t* OBIDMS_column_p
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const_char_p column_name,
OBIType_t type,
index_t nb_lines,
index_t nb_elements_per_line,
const_char_p elements_names,
const_char_p indexer_name,
const_char_p associated_colum_name,
obiversion_t associated_colum_version,
const_char_p comments)
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
const_char_p column_name,
obiversion_t version_number)
int obi_close_column(OBIDMS_column_p column)
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
OBIDMS_column_p line_selection,
const_char_p column_name,
obiversion_t version_number,
bint clone_data)
int obi_close_column(OBIDMS_column_p column)
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms,
const_char_p column_name)
@ -81,9 +58,9 @@ cdef extern from "obidmscolumn.h" nogil:
obiversion_t version_number)
int obi_close_header(OBIDMS_column_header_p header)
int obi_select(OBIDMS_column_p line_selection_column, index_t line_to_grep)
char* obi_get_elements_names(OBIDMS_column_p column)
cdef extern from "obidmscolumn_int.h" nogil:

View File

@ -0,0 +1,65 @@
../../../../src/bloom.h
../../../../src/bloom.c
../../../../src/char_str_indexer.h
../../../../src/char_str_indexer.c
../../../../src/crc64.h
../../../../src/crc64.c
../../../../src/dna_seq_indexer.h
../../../../src/dna_seq_indexer.c
../../../../src/encode.h
../../../../src/encode.c
../../../../src/hashtable.h
../../../../src/hashtable.c
../../../../src/murmurhash2.h
../../../../src/murmurhash2.c
../../../../src/obi_align.h
../../../../src/obi_align.c
../../../../src/obiavl.h
../../../../src/obiavl.c
../../../../src/obiblob_indexer.h
../../../../src/obiblob_indexer.c
../../../../src/obiblob.h
../../../../src/obiblob.c
../../../../src/obidebug.h
../../../../src/obidms_taxonomy.h
../../../../src/obidms_taxonomy.c
../../../../src/obidms.h
../../../../src/obidms.c
../../../../src/obidmscolumn_blob.c
../../../../src/obidmscolumn_blob.h
../../../../src/obidmscolumn_bool.c
../../../../src/obidmscolumn_bool.h
../../../../src/obidmscolumn_char.c
../../../../src/obidmscolumn_char.h
../../../../src/obidmscolumn_float.c
../../../../src/obidmscolumn_float.h
../../../../src/obidmscolumn_idx.h
../../../../src/obidmscolumn_idx.c
../../../../src/obidmscolumn_int.c
../../../../src/obidmscolumn_int.h
../../../../src/obidmscolumn_qual.h
../../../../src/obidmscolumn_qual.c
../../../../src/obidmscolumn_seq.c
../../../../src/obidmscolumn_seq.h
../../../../src/obidmscolumn_str.c
../../../../src/obidmscolumn_str.h
../../../../src/obidmscolumn.h
../../../../src/obidmscolumn.c
../../../../src/obidmscolumndir.h
../../../../src/obidmscolumndir.c
../../../../src/obierrno.h
../../../../src/obierrno.c
../../../../src/obilittlebigman.h
../../../../src/obilittlebigman.c
../../../../src/obitypes.h
../../../../src/obitypes.c
../../../../src/obiview.h
../../../../src/obiview.c
../../../../src/sse_banded_LCS_alignment.h
../../../../src/sse_banded_LCS_alignment.c
../../../../src/uint8_indexer.h
../../../../src/uint8_indexer.c
../../../../src/upperband.h
../../../../src/upperband.c
../../../../src/utils.h
../../../../src/utils.c

View File

@ -0,0 +1,65 @@
../../../../src/bloom.h
../../../../src/bloom.c
../../../../src/char_str_indexer.h
../../../../src/char_str_indexer.c
../../../../src/crc64.h
../../../../src/crc64.c
../../../../src/dna_seq_indexer.h
../../../../src/dna_seq_indexer.c
../../../../src/encode.h
../../../../src/encode.c
../../../../src/hashtable.h
../../../../src/hashtable.c
../../../../src/murmurhash2.h
../../../../src/murmurhash2.c
../../../../src/obi_align.h
../../../../src/obi_align.c
../../../../src/obiavl.h
../../../../src/obiavl.c
../../../../src/obiblob_indexer.h
../../../../src/obiblob_indexer.c
../../../../src/obiblob.h
../../../../src/obiblob.c
../../../../src/obidebug.h
../../../../src/obidms_taxonomy.h
../../../../src/obidms_taxonomy.c
../../../../src/obidms.h
../../../../src/obidms.c
../../../../src/obidmscolumn_blob.c
../../../../src/obidmscolumn_blob.h
../../../../src/obidmscolumn_bool.c
../../../../src/obidmscolumn_bool.h
../../../../src/obidmscolumn_char.c
../../../../src/obidmscolumn_char.h
../../../../src/obidmscolumn_float.c
../../../../src/obidmscolumn_float.h
../../../../src/obidmscolumn_idx.h
../../../../src/obidmscolumn_idx.c
../../../../src/obidmscolumn_int.c
../../../../src/obidmscolumn_int.h
../../../../src/obidmscolumn_qual.h
../../../../src/obidmscolumn_qual.c
../../../../src/obidmscolumn_seq.c
../../../../src/obidmscolumn_seq.h
../../../../src/obidmscolumn_str.c
../../../../src/obidmscolumn_str.h
../../../../src/obidmscolumn.h
../../../../src/obidmscolumn.c
../../../../src/obidmscolumndir.h
../../../../src/obidmscolumndir.c
../../../../src/obierrno.h
../../../../src/obierrno.c
../../../../src/obilittlebigman.h
../../../../src/obilittlebigman.c
../../../../src/obitypes.h
../../../../src/obitypes.c
../../../../src/obiview.h
../../../../src/obiview.c
../../../../src/sse_banded_LCS_alignment.h
../../../../src/sse_banded_LCS_alignment.c
../../../../src/uint8_indexer.h
../../../../src/uint8_indexer.c
../../../../src/upperband.h
../../../../src/upperband.c
../../../../src/utils.h
../../../../src/utils.c

View File

@ -13,7 +13,8 @@ cdef extern from "obidms_taxonomy.h" nogil:
int32_t farest
ecotxnode* parent
char* name
char* preferred_name
ctypedef ecotxnode ecotx_t
@ -56,4 +57,9 @@ cdef extern from "obidms_taxonomy.h" nogil:
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)

View File

@ -0,0 +1,65 @@
../../../../src/bloom.h
../../../../src/bloom.c
../../../../src/char_str_indexer.h
../../../../src/char_str_indexer.c
../../../../src/crc64.h
../../../../src/crc64.c
../../../../src/dna_seq_indexer.h
../../../../src/dna_seq_indexer.c
../../../../src/encode.h
../../../../src/encode.c
../../../../src/hashtable.h
../../../../src/hashtable.c
../../../../src/murmurhash2.h
../../../../src/murmurhash2.c
../../../../src/obi_align.h
../../../../src/obi_align.c
../../../../src/obiavl.h
../../../../src/obiavl.c
../../../../src/obiblob_indexer.h
../../../../src/obiblob_indexer.c
../../../../src/obiblob.h
../../../../src/obiblob.c
../../../../src/obidebug.h
../../../../src/obidms_taxonomy.h
../../../../src/obidms_taxonomy.c
../../../../src/obidms.h
../../../../src/obidms.c
../../../../src/obidmscolumn_blob.c
../../../../src/obidmscolumn_blob.h
../../../../src/obidmscolumn_bool.c
../../../../src/obidmscolumn_bool.h
../../../../src/obidmscolumn_char.c
../../../../src/obidmscolumn_char.h
../../../../src/obidmscolumn_float.c
../../../../src/obidmscolumn_float.h
../../../../src/obidmscolumn_idx.h
../../../../src/obidmscolumn_idx.c
../../../../src/obidmscolumn_int.c
../../../../src/obidmscolumn_int.h
../../../../src/obidmscolumn_qual.h
../../../../src/obidmscolumn_qual.c
../../../../src/obidmscolumn_seq.c
../../../../src/obidmscolumn_seq.h
../../../../src/obidmscolumn_str.c
../../../../src/obidmscolumn_str.h
../../../../src/obidmscolumn.h
../../../../src/obidmscolumn.c
../../../../src/obidmscolumndir.h
../../../../src/obidmscolumndir.c
../../../../src/obierrno.h
../../../../src/obierrno.c
../../../../src/obilittlebigman.h
../../../../src/obilittlebigman.c
../../../../src/obitypes.h
../../../../src/obitypes.c
../../../../src/obiview.h
../../../../src/obiview.c
../../../../src/sse_banded_LCS_alignment.h
../../../../src/sse_banded_LCS_alignment.c
../../../../src/uint8_indexer.h
../../../../src/uint8_indexer.c
../../../../src/upperband.h
../../../../src/upperband.c
../../../../src/utils.h
../../../../src/utils.c

View File

@ -0,0 +1,65 @@
../../../../src/bloom.h
../../../../src/bloom.c
../../../../src/char_str_indexer.h
../../../../src/char_str_indexer.c
../../../../src/crc64.h
../../../../src/crc64.c
../../../../src/dna_seq_indexer.h
../../../../src/dna_seq_indexer.c
../../../../src/encode.h
../../../../src/encode.c
../../../../src/hashtable.h
../../../../src/hashtable.c
../../../../src/murmurhash2.h
../../../../src/murmurhash2.c
../../../../src/obi_align.h
../../../../src/obi_align.c
../../../../src/obiavl.h
../../../../src/obiavl.c
../../../../src/obiblob_indexer.h
../../../../src/obiblob_indexer.c
../../../../src/obiblob.h
../../../../src/obiblob.c
../../../../src/obidebug.h
../../../../src/obidms_taxonomy.h
../../../../src/obidms_taxonomy.c
../../../../src/obidms.h
../../../../src/obidms.c
../../../../src/obidmscolumn_blob.c
../../../../src/obidmscolumn_blob.h
../../../../src/obidmscolumn_bool.c
../../../../src/obidmscolumn_bool.h
../../../../src/obidmscolumn_char.c
../../../../src/obidmscolumn_char.h
../../../../src/obidmscolumn_float.c
../../../../src/obidmscolumn_float.h
../../../../src/obidmscolumn_idx.h
../../../../src/obidmscolumn_idx.c
../../../../src/obidmscolumn_int.c
../../../../src/obidmscolumn_int.h
../../../../src/obidmscolumn_qual.h
../../../../src/obidmscolumn_qual.c
../../../../src/obidmscolumn_seq.c
../../../../src/obidmscolumn_seq.h
../../../../src/obidmscolumn_str.c
../../../../src/obidmscolumn_str.h
../../../../src/obidmscolumn.h
../../../../src/obidmscolumn.c
../../../../src/obidmscolumndir.h
../../../../src/obidmscolumndir.c
../../../../src/obierrno.h
../../../../src/obierrno.c
../../../../src/obilittlebigman.h
../../../../src/obilittlebigman.c
../../../../src/obitypes.h
../../../../src/obitypes.c
../../../../src/obiview.h
../../../../src/obiview.c
../../../../src/sse_banded_LCS_alignment.h
../../../../src/sse_banded_LCS_alignment.c
../../../../src/uint8_indexer.h
../../../../src/uint8_indexer.c
../../../../src/upperband.h
../../../../src/upperband.c
../../../../src/utils.h
../../../../src/utils.c

View File

@ -0,0 +1,65 @@
../../../../src/bloom.h
../../../../src/bloom.c
../../../../src/char_str_indexer.h
../../../../src/char_str_indexer.c
../../../../src/crc64.h
../../../../src/crc64.c
../../../../src/dna_seq_indexer.h
../../../../src/dna_seq_indexer.c
../../../../src/encode.h
../../../../src/encode.c
../../../../src/hashtable.h
../../../../src/hashtable.c
../../../../src/murmurhash2.h
../../../../src/murmurhash2.c
../../../../src/obi_align.h
../../../../src/obi_align.c
../../../../src/obiavl.h
../../../../src/obiavl.c
../../../../src/obiblob_indexer.h
../../../../src/obiblob_indexer.c
../../../../src/obiblob.h
../../../../src/obiblob.c
../../../../src/obidebug.h
../../../../src/obidms_taxonomy.h
../../../../src/obidms_taxonomy.c
../../../../src/obidms.h
../../../../src/obidms.c
../../../../src/obidmscolumn_blob.c
../../../../src/obidmscolumn_blob.h
../../../../src/obidmscolumn_bool.c
../../../../src/obidmscolumn_bool.h
../../../../src/obidmscolumn_char.c
../../../../src/obidmscolumn_char.h
../../../../src/obidmscolumn_float.c
../../../../src/obidmscolumn_float.h
../../../../src/obidmscolumn_idx.h
../../../../src/obidmscolumn_idx.c
../../../../src/obidmscolumn_int.c
../../../../src/obidmscolumn_int.h
../../../../src/obidmscolumn_qual.h
../../../../src/obidmscolumn_qual.c
../../../../src/obidmscolumn_seq.c
../../../../src/obidmscolumn_seq.h
../../../../src/obidmscolumn_str.c
../../../../src/obidmscolumn_str.h
../../../../src/obidmscolumn.h
../../../../src/obidmscolumn.c
../../../../src/obidmscolumndir.h
../../../../src/obidmscolumndir.c
../../../../src/obierrno.h
../../../../src/obierrno.c
../../../../src/obilittlebigman.h
../../../../src/obilittlebigman.c
../../../../src/obitypes.h
../../../../src/obitypes.c
../../../../src/obiview.h
../../../../src/obiview.c
../../../../src/sse_banded_LCS_alignment.h
../../../../src/sse_banded_LCS_alignment.c
../../../../src/uint8_indexer.h
../../../../src/uint8_indexer.c
../../../../src/upperband.h
../../../../src/upperband.c
../../../../src/utils.h
../../../../src/utils.c

View File

@ -68,7 +68,7 @@ cdef extern from "obiview.h" nogil:
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column)
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bint finished)
int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
@ -94,11 +94,7 @@ cdef extern from "obiview.h" nogil:
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
int obi_save_view(Obiview_p view)
int obi_close_view(Obiview_p view)
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
int obi_save_and_close_view(Obiview_p view)

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,12 @@
/****************************************************************************
* Sequence alignment functions header file *
* LCS sequence alignment functions header file *
****************************************************************************/
/**
* @file obi_align.h
* @author Celine Mercier
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date May 11th 2016
* @brief Header file for the functions handling the alignment of DNA sequences.
* @brief Header file for the functions handling the LCS alignment of DNA sequences.
*/
@ -55,7 +55,7 @@
/**
* @brief Aligns a NUC_SEQ column with itself.
* @brief Aligns an OBI_SEQ column with itself.
*
* Note: The columns where the results are written are automatically named and created.
*
@ -77,7 +77,7 @@
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
* @param normalize Whether the score should be normalized with the reference sequence length.
* @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
*
* @returns A value indicating the success of the operation.
@ -92,18 +92,64 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
const char* id_column_name,
const char* output_view_name, const char* output_view_comments,
bool print_seq, bool print_count,
double threshold, bool normalize, int reference, bool similarity_mode);
double threshold, bool normalize, int reference, bool similarity_mode,
int thread_count);
/**
* @brief
* @brief Aligns two OBI_SEQ columns.
*
* TODO
* The columns must belong to the same OBIDMS, but can belong to different views.
*
* Note: The columns where the results are written are automatically named and created.
*
* @param dms A pointer on an OBIDMS.
* @param seq1_view_name The name of the view where the first column to align is.
* @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one).
* @param seq1_column_name The name of the first OBI_SEQ column in the input view to align.
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
* @param seq2_column_name The name of the second OBI_SEQ column in the input view to align.
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
* @param seq1_elt_name The name of the element in the first column corresponding to the sequence to align, if the column has multiple
* elements per line.
* @param seq2_elt_name The name of the element in the second column corresponding to the sequence to align, if the column has multiple
* elements per line.
* @param id1_column_name The name of the column in the first input view containing the identifiers of the first sequence to align.
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
* @param id2_column_name The name of the column in the second input view containing the identifiers of the second sequence to align.
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
* @param output_view_name The name of the output view where the results should be written (should not already exist).
* @param output_view_comments The comments that should be associated with the output view.
* @param print_seq A boolean indicating whether the aligned sequences should be copied in the output view.
* @param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view.
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
* @param normalize Whether the score should be normalized with the reference sequence length.
* @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,
// Obiview_p score_view, OBIDMS_column_p score_column,
// double threshold, bool normalize, int reference, bool similarity_mode);
int obi_lcs_align_two_columns(OBIDMS_p dms,
const char* seq1_view_name,
const char* seq2_view_name,
const char* seq1_column_name,
const char* seq2_column_name,
const char* seq1_elt_name,
const char* seq2_elt_name,
const char* id1_column_name,
const char* id2_column_name,
const char* output_view_name, const char* output_view_comments,
bool print_seq, bool print_count,
double threshold, bool normalize, int reference, bool similarity_mode);
#endif /* OBI_ALIGN_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
* @file obidms_taxonomy.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date March 2nd 2016
* @brief Header file for the functions handling the reading of binary taxonomy files.
* @brief Header file for the functions handling the reading and writing of taxonomy files.
*/
@ -17,105 +17,384 @@
#include "obidms.h"
#define MIN_LOCAL_TAXID (10000000)
#define TAX_NAME_LEN (1024)
#define MIN_LOCAL_TAXID (10000000) /**< The minimum taxid for a taxon added locally (i.e. not an NCBI taxon).
*/
#define TAX_NAME_LEN (1024) /**< The maximum length for the taxonomy name.
*/
/**
* @brief Structure for a taxon as stored in a .tdx file.
*/
typedef struct {
int32_t taxid;
int32_t rank;
int32_t parent;
int32_t name_length;
char name[1];
int32_t taxid; /**< Taxid.
*/
int32_t rank; /**< Rank index.
*/
int32_t parent; /**< Index, in the taxid index, of the parent node in the taxonomic tree.
*/
int32_t name_length; /**< Length of the taxon scientific name.
*/
char name[]; /**< Scientific name of the taxon.
*/
} ecotxformat_t;
/**
* @brief Structure for a taxon as stored in a taxonomy structure.
*/
typedef struct ecotxnode {
int32_t taxid;
int32_t rank;
int32_t farest;
int32_t idx;
struct ecotxnode* parent;
char* name;
bool local;
int32_t taxid; /**< Taxid. // TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
*/
int32_t rank; /**< Rank index in ecorankidx_t structure.
*/
int32_t farest; /**< Longest branch length, used to compute distances between taxa faster.
*/
int32_t idx; /**< Index in the ecotxidx_t structure.
*/
struct ecotxnode* parent; /**< Pointer on the parent node in the taxonomic tree.
*/
char* name; /**< Scientific name of the taxon.
*/
char* preferred_name; /**< Preferred name of the taxon if there is one, otherwise NULL.
*/
bool local; /**< A boolean indicating whether the taxon is local or not.
*/
} ecotx_t;
/**
* @brief Structure for the taxon index in a taxonomy structure.
*/
typedef struct {
int32_t count;
int32_t ncbi_count;
int32_t local_count;
int32_t max_taxid;
int32_t buffer_size;
ecotx_t taxon[1];
int32_t count; /**< Number of taxa.
*/
int32_t ncbi_count; /**< Number of NCBI taxa.
*/
int32_t local_count; /**< Number of taxa added locally.
*/
int32_t max_taxid; /**< Maximum taxid existing in the taxon index.
*/
int32_t buffer_size; /**< Number of taxa. // TODO kept this but not sure of its use
*/
ecotx_t taxon[]; /**< Taxon array.
*/
} ecotxidx_t;
/**
* @brief Structure for the rank index in a taxonomy structure.
*/
typedef struct {
int32_t count;
char* label[1];
int32_t count; /**< Number of ranks.
*/
char* label[]; /**< Array of rank names.
*/
} ecorankidx_t;
/**
* @brief Structure for a taxon name as stored in a .ndx file.
*/
typedef struct {
int32_t is_scientific_name;
int32_t name_length;
int32_t class_length;
int32_t taxid; // taxid idx
char names[1];
int32_t is_scientific_name; /**< A boolean indicating whether the name is a scientific name or not.
*/
int32_t name_length; /**< The name length.
*/
int32_t class_length; /**< The name class length.
*/
int32_t taxid; /**< Index of the taxon in the taxid index.
*/
char names[]; /**< Taxon name and name class concatenated.
*/
} econameformat_t;
/**
* @brief Structure for a taxon name as stored in a taxonomy structure.
*/
typedef struct {
char* name;
char* class_name;
int32_t is_scientific_name;
struct ecotxnode* taxon;
char* name; /**< Taxon name.
*/
char* class_name; /**< Name class.
*/
int32_t is_scientific_name; /**< A boolean indicating whether the name is a scientific name or not.
*/
struct ecotxnode* taxon; /**< Pointer on the taxon in the taxon index.
*/
} econame_t;
/**
* @brief Structure for the name index in a taxonomy structure.
*/
typedef struct {
int32_t count;
econame_t names[1];
int32_t count; /**< Number of names.
*/
econame_t names[]; /**< Array of names.
*/
} econameidx_t;
/**
* @brief Structure for a taxid/index pair as stored in a taxonomy structure.
*/
typedef struct {
int32_t taxid; /**< Taxid.
*/
int32_t idx; /**< Index of the taxid in the taxon index, -1 if the taxid is deprecated.
*/
} ecomerged_t;
/**
* @brief Structure for a merged taxid index in a taxonomy structure.
*
* This index includes all deprecated taxids that now refer to different taxids, and
* the deprecated taxids that are deleted.
*
*/
typedef struct {
int32_t count; /**< Number of taxid/index pairs.
*/
ecomerged_t merged[]; /**< Array of taxid/index pairs.
*/
} ecomergedidx_t;
/**
* @brief Structure for a taxonomy.
*/
typedef struct OBIDMS_taxonomy_t {
char tax_name[TAX_NAME_LEN];
OBIDMS_p dms;
ecorankidx_t* ranks;
econameidx_t* names;
ecotxidx_t* taxa;
char tax_name[TAX_NAME_LEN]; /**< Taxonomy name.
*/
OBIDMS_p dms; /**< A pointer on the DMS to which the taxonomy belongs.
*/
ecomergedidx_t* merged_idx; /**< Merged taxid index.
*/
ecorankidx_t* ranks; /**< Taxonomic ranks.
*/
econameidx_t* names; /**< Taxon names.
*/
econameidx_t* preferred_names; /**< Taxon preferred names (i.e. added locally).
*/
ecotxidx_t* taxa; /**< Taxa.
*/
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
/**
* @brief Function reading an NCBI taxdump and loading its information into a taxonomy structure.
*
* @param taxdump The path to the taxdump directory.
*
* @returns A pointer on the read taxonomy structure.
* @retval NULL if an error occurred.
*
* @since 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
/**
* @brief Function reading a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files)
* and loading its information into a taxonomy structure.
*
* @param dms A pointer on the DMS to which the taxonomy belongs.
* @param taxonomy_name The name (prefix) of the taxonomy.
* @param read_alternative_names A boolean indicating whether names other than scientific and preferred names should be read.
*
* @returns A pointer on the read taxonomy structure.
* @retval NULL if an error occurred.
*
* @since 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
/**
* @brief Function writing a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files).
*
* @param dms A pointer on the DMS to which the taxonomy belongs.
* @param tax A pointer on the taxonomy structure.
* @param tax_name The name (prefix) of the taxonomy.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
/**
* @brief Function closing a taxonomy structure.
*
* This function writes all changes to the binary files (local taxa and preferred names) and free all allocated memory for the structure.
*
* @param taxonomy A pointer on the taxonomy structure.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
/**
* @brief Function adding a local taxon to a taxonomy.
*
* @param tax A pointer on the taxonomy structure.
* @param name The taxon scientific name.
* @param rank_name The taxon rank name.
* @param parent_taxid The taxid of the parent node in the taxonomic tree.
* @param min_taxid The minimum taxid to give to the new taxon (the function will choose a new taxid >= min_taxid and >= MIN_LOCAL_TAXID).
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
/**
* @brief Function adding a preferred name to a taxon in a taxonomy, referred to by its taxid.
*
* @param tax A pointer on the taxonomy structure.
* @param taxid The taxid of the taxon that should have a new preferred name.
* @param preferred_name The new preferred name.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
/**
* @brief Function adding a preferred name to a taxon in a taxonomy, referred to by the taxon pointer.
*
* @param tax A pointer on the taxonomy structure.
* @param taxon A pointer on the taxon that should have a new preferred name.
* @param preferred_name The new preferred name.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);
/**
* @brief Function returning the parent of a taxon at a given rank.
*
* @param taxon A pointer on the taxon.
* @param rankidx The index of the rank wanted.
*
* @returns A pointer on the parent taxon at the wanted rank.
* @retval NULL if no parent taxon was found at the wanted rank.
*/
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
/**
* @brief Function returning a taxon given its taxid.
*
* @param taxonomy A pointer on the taxonomy.
* @param taxid The taxid of the taxon.
*
* @returns A pointer on the wanted taxon.
* @retval NULL if no taxon was found with the given taxid.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
/**
* @brief Function checking whether a taxon is under another in the taxonomy tree.
*
* @param taxon A pointer on the first taxon.
* @param other_taxid The taxid of the second taxon.
*
* @returns A boolean indicating whether the first taxon is under the second taxon in the taxonomy tree.
*/
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
/**
* @brief Function returning the parent of a taxon at the species level.
*
* @param taxon A pointer on the taxon.
* @param taxonomy A pointer on the taxonomy structure.
*
* @returns A pointer on the parent taxon at the species level.
* @retval NULL if no parent taxon was found at the wanted rank.
*/
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
/**
* @brief Function returning the parent of a taxon at the genus level.
*
* @param taxon A pointer on the taxon.
* @param taxonomy A pointer on the taxonomy structure.
*
* @returns A pointer on the parent taxon at the genus level.
* @retval NULL if no parent taxon was found at the wanted rank.
*/
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
/**
* @brief Function returning the parent of a taxon at the family level.
*
* @param taxon A pointer on the taxon.
* @param taxonomy A pointer on the taxonomy structure.
*
* @returns A pointer on the parent taxon at the family level.
* @retval NULL if no parent taxon was found at the wanted rank.
*/
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
/**
* @brief Function returning the parent of a taxon at the kingdom level.
*
* @param taxon A pointer on the taxon.
* @param taxonomy A pointer on the taxonomy structure.
*
* @returns A pointer on the parent taxon at the kingdom level.
* @retval NULL if no parent taxon was found at the wanted rank.
*/
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
/**
* @brief Function returning the parent of a taxon at the superkingdom level.
*
* @param taxon A pointer on the taxon.
* @param taxonomy A pointer on the taxonomy structure.
*
* @returns A pointer on the parent taxon at the superkingdom level.
* @retval NULL if no parent taxon was found at the wanted rank.
*/
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);

View File

@ -119,7 +119,7 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
/**
* @brief Internal function building the default elements names of the lines of a
* column (i.e. "0;1;2;...;n").
* column, with ';' as separator (i.e. "0;1;2;...;n\0").
*
* @warning The returned pointer has to be freed by the caller.
*
@ -134,12 +134,61 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
static char* build_default_elements_names(index_t nb_elements_per_line);
/**
* @brief Internal function formatting the elements names of the lines of a
* column with '\0' as separator (e.g. "0\01\02\0...\0n\0").
*
* @param elements_names The character string formatted with ';' as separator (e.g. "0;1;2;...;n\0").
* @param elts_names_length A pointer on an integer where the function will store the length of the character string.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static void format_elements_names(char* elements_names, int* elts_names_length);
/**
* @brief Internal function comparing two element names using their sorted index, using data stored in the column header.
*
* @param n1_sort_idx A pointer on the sorted index of the first name.
* @param n2_sort_idx A pointer on the sorted index of the second name.
* @param h A pointer on the column header.
*
* @returns A value < 0 if name1 < name2,
* a value > 0 if name1 > name2,
* and 0 if name1 == name2.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h);
/**
* @brief Internal function comparing two element names using a pointer on the first name and the sorted index of the second name,
* using data stored in the column header.
*
* @param name1 A pointer on the first name.
* @param n2_sort_idx A pointer on the sorted index of the second name.
* @param h A pointer on the column header.
*
* @returns A value < 0 if name1 < name2,
* a value > 0 if name1 > name2,
* and 0 if name1 == name2.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h);
/**
* @brief Internal function setting the elements names of the lines of a
* column in the header of the OBIDMS column structure.
*
* @param column A pointer as returned by obi_create_column().
* @param elements_names The names of the elements with ';' as separator.
* @param elements_names The names of the elements as formatted by format_elements_names().
* @param elts_names_length The length of elements_names.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
@ -147,7 +196,35 @@ static char* build_default_elements_names(index_t nb_elements_per_line);
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names);
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length);
/**
* @brief Internal function counting the number of elements names in a character array.
*
* @param elements_names A pointer on the character string corresponding to the elements names,
* formatted with ';' or with '\0' as separator.
* @param elt_names_formatted Whether the separator is ';' (false), or '\0' (true, as formatted by format_elements_names()).
*
* @returns The number of elements names in the character array.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted);
/**
* @brief Internal function computing the length of a character array containing elements names as formatted by format_elements_names().
*
* @param elements_names A pointer on the character string corresponding to the elements names as formatted by format_elements_names().
*
* @returns The length of a character array.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int get_formatted_elt_names_length(const char* elements_names);
/**
@ -198,6 +275,7 @@ static char* build_column_file_name(const char* column_name, obiversion_t versio
}
static char* build_version_file_name(const char* column_name)
{
char* file_name;
@ -222,6 +300,7 @@ static char* build_version_file_name(const char* column_name)
}
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block)
{
off_t loc_size;
@ -346,6 +425,7 @@ static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_
}
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory)
{
off_t loc_size;
@ -437,10 +517,12 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
}
static char* build_default_elements_names(index_t nb_elements_per_line)
{
char* elements_names;
int i;
int len;
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
if (elements_names == NULL)
@ -457,31 +539,169 @@ static char* build_default_elements_names(index_t nb_elements_per_line)
return NULL;
}
for (i= 0; i < nb_elements_per_line; i++)
sprintf(elements_names, "%d", i);
len = 0;
for (i = 0; i < nb_elements_per_line; i++)
len += sprintf(elements_names+len, "%d;", i);
// Terminal character
elements_names[strlen(elements_names)] = '\0';
elements_names[len-1] = '\0'; // -1 to delete last ';'
len--;
return elements_names;
}
int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names)
static void format_elements_names(char* elements_names, int* elts_names_length)
{
if (strlen(elements_names) > ELEMENTS_NAMES_MAX)
int i;
*elts_names_length = strlen(elements_names);
// Replace the ';' with '\0'
for (i=0; i < *elts_names_length; i++)
{
if (elements_names[i] == ';')
elements_names[i] = '\0';
}
}
static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h)
{
char* name1=NULL;
char* name2=NULL;
int name1_idx;
int name2_idx;
int name1_sort_idx = *((int*)n1_sort_idx);
int name2_sort_idx = *((int*)n2_sort_idx);
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
name1_idx = (header->elements_names_idx)[name1_sort_idx];
name1 = (header->elements_names)+name1_idx;
name2_idx = (header->elements_names_idx)[name2_sort_idx];
name2 = (header->elements_names)+name2_idx;
return strcmp(name1, name2);
}
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h)
{
char* name2=NULL;
int name2_idx;
int name2_sort_idx = *((int*)n2_sort_idx);
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
name2_idx = (header->elements_names_idx)[name2_sort_idx];
name2 = (header->elements_names)+name2_idx;
return strcmp(name1, name2);
}
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length)
{
int i, j;
// Check that the elements names are not too long
if (elts_names_length+2 > ELEMENTS_NAMES_MAX)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: element names too long (max: %d)", ELEMENTS_NAMES_MAX);
return -1;
}
strcpy((column->header)->elements_names, elements_names);
// Copy the elements names in the header
memcpy((column->header)->elements_names, elements_names, elts_names_length*sizeof(char));
// Terminal characters
(column->header)->elements_names[elts_names_length] = '\0';
(column->header)->elements_names[elts_names_length + 1] = '\0';
// Store the length of the character array containing the elements names
(column->header)->elements_names_length = elts_names_length;
// Build the elements names index
i = 0;
j = 0;
// Index the first element name
((column->header)->elements_names_idx)[j] = i;
((column->header)->sorted_elements_idx)[j] = j;
i++;
j++;
while (i < elts_names_length)
{
if (elements_names[i] == '\0')
{ // Index new element name
((column->header)->elements_names_idx)[j] = i+1;
((column->header)->sorted_elements_idx)[j] = j;
j++;
}
i++;
}
// Build the sorted index
qsort_user_data((column->header)->sorted_elements_idx, j, sizeof(int), column->header, cmp_elements_names_with_idx);
return 0;
}
index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted)
{
char sep;
int i = 0;
bool stop = false;
index_t count = 0;
if (elt_names_formatted)
sep = FORMATTED_ELT_NAMES_SEPARATOR;
else
sep = NOT_FORMATTED_ELT_NAMES_SEPARATOR;
while (! stop)
{
if ((elt_names_formatted && (elements_names[i] == '\0') && (elements_names[i+1] == '\0')) ||
((! elt_names_formatted) && (elements_names[i] == '\0')))
stop = true;
if ((elements_names[i] == sep) || (elements_names[i] == '\0'))
count++;
i++;
}
return count;
}
static int get_formatted_elt_names_length(const char* elements_names)
{
int i = 0;
bool stop = false;
while (! stop)
{
if ((elements_names[i] == '\0') && (elements_names[i+1] == '\0'))
stop = true;
else
i++;
}
return i;
}
static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
{
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
}
@ -493,6 +713,7 @@ index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_lin
*
**********************************************************************/
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory)
{
off_t loc_size;
@ -557,6 +778,7 @@ obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_dire
}
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name)
{
OBIDMS_column_directory_p column_directory;
@ -582,6 +804,7 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c
}
size_t obi_get_platform_header_size()
{
size_t header_size;
@ -607,7 +830,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* indexer_name,
const char* associated_column_name,
obiversion_t associated_column_version,
const char* comments
const char* comments,
bool elt_names_formatted
)
{
OBIDMS_column_p new_column;
@ -623,6 +847,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
OBIType_t returned_data_type;
OBIType_t stored_data_type;
char* final_indexer_name;
char* built_elements_names = NULL;
int elts_names_length;
new_column = NULL;
@ -695,31 +921,29 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
else if (nb_lines < minimum_line_count)
nb_lines = minimum_line_count;
// Check and build if needed the element names
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0)) // Build the default element names: str of the element index
// Check, format, and build if needed the element names
if ((elements_names == NULL) || (*elements_names == '\0')) // Build the default element names: str of the element index
{
elements_names = build_default_elements_names(nb_elements_per_line);
if (elements_names == NULL)
built_elements_names = build_default_elements_names(nb_elements_per_line);
if (built_elements_names == NULL)
return NULL;
elements_names = built_elements_names;
}
else if (((elements_names == NULL) || (strcmp(elements_names, "") != 0)) && (nb_elements_per_line > 1))
else
{ // The number of elements names should be equal to the number of elements per line
char* token;
index_t n = 0;
token = strdup(elements_names);
token = strtok(token, ";");
while (token != NULL)
if (check_elt_names_count(elements_names, elt_names_formatted) != nb_elements_per_line)
{
token = strtok(NULL, ";");
n++;
}
if (n != nb_elements_per_line)
{
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line:"
"\n%lld elements per line\nelements names:%s\n", nb_elements_per_line, elements_names);
return NULL;
}
}
// TODO what if 1 element and name specified? doc
// Format the elements names string
if (! elt_names_formatted)
format_elements_names(elements_names, &elts_names_length);
else
elts_names_length = get_formatted_elt_names_length(elements_names);
// Calculate the size needed
header_size = obi_get_platform_header_size();
@ -816,11 +1040,11 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
header->version = version_number;
header->cloned_from = -1;
obi_column_set_elements_names(new_column, elements_names);
set_elements_names(new_column, elements_names, elts_names_length);
// Free the element names if they were built
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0))
free(elements_names);
if (built_elements_names != NULL)
free(built_elements_names);
strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME);
@ -886,6 +1110,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
}
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
const char* column_name,
obiversion_t version_number)
@ -1043,6 +1268,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
}
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
OBIDMS_column_p line_selection,
const char* column_name,
@ -1083,7 +1309,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
(column_to_clone->header)->indexer_name,
((column_to_clone->header)->associated_column).column_name,
((column_to_clone->header)->associated_column).version,
(column_to_clone->header)->comments
(column_to_clone->header)->comments,
true
);
if (new_column == NULL)
@ -1097,6 +1324,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
return NULL;
}
(new_column->header)->cloned_from = (column_to_clone->header)->version;
if (clone_data && (line_selection == NULL))
@ -1137,6 +1366,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
}
int obi_close_column(OBIDMS_column_p column)
{
int ret_val = 0;
@ -1185,6 +1415,7 @@ int obi_close_column(OBIDMS_column_p column)
}
int obi_clone_column_indexer(OBIDMS_column_p column)
{
char* new_indexer_name;
@ -1208,6 +1439,7 @@ int obi_clone_column_indexer(OBIDMS_column_p column)
}
int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap/remap?
{
size_t file_size;
@ -1309,6 +1541,7 @@ int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap
}
int obi_enlarge_column(OBIDMS_column_p column)
{
size_t file_size;
@ -1363,7 +1596,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
header_size = (column->header)->header_size;
file_size = header_size + new_data_size;
// Enlarge the file // TODO isn't it possible that this makes the file "move"?
// Enlarge the file
if (ftruncate(column_file_descriptor, file_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
@ -1414,6 +1647,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
}
void obi_ini_to_NA_values(OBIDMS_column_p column,
index_t first_line_nb,
index_t nb_lines)
@ -1479,6 +1713,7 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
}
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number)
{
OBIDMS_column_header_p header;
@ -1562,6 +1797,7 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
}
int obi_close_header(OBIDMS_column_header_p header)
{
if (munmap(header, header->header_size) < 0)
@ -1574,47 +1810,56 @@ int obi_close_header(OBIDMS_column_header_p header)
}
// TODO to be rewritten in an optimized and safe way if possible
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
{
char* elements_names;
char* name;
index_t element_index;
int* elt_names_idx;
elements_names = strdup((column->header)->elements_names);
if (elements_names == NULL)
{
obidebug(1, "\nError strdup-ing the elements names");
return OBIIdx_NA;
}
elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(int), column->header, cmp_elements_names_with_name_and_idx);
element_index = 0;
if (elt_names_idx != NULL)
return (index_t)(*elt_names_idx);
name = strtok(elements_names, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
return element_index;
}
element_index++;
while (name != NULL)
{
name = strtok(NULL, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
return element_index;
}
element_index++;
}
obidebug(1, "\nCan't find an element name");
free(elements_names);
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: could not find element name %s", element_name);
return OBIIdx_NA;
}
// TODO doc, returns elements names with ; as separator (discuss maybe char**)
char* obi_get_elements_names(OBIDMS_column_p column)
{
char* elements_names;
int i, j;
int elt_idx;
int len;
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
if (elements_names == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for elements names");
return NULL;
}
j = 0;
for (i=0; i < (column->header)->nb_elements_per_line; i++)
{
elt_idx = ((column->header)->elements_names_idx)[i];
len = strlen(((column->header)->elements_names)+elt_idx);
memcpy(elements_names+j, ((column->header)->elements_names)+elt_idx, len*sizeof(char));
j = j + len;
elements_names[j] = ';';
j++;
}
elements_names[j - 1] = '\0';
return elements_names;
}
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
{
// Check if the column is read-only
@ -1649,6 +1894,7 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
}
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
{
if ((line_nb+1) > ((column->header)->line_count))

View File

@ -28,17 +28,21 @@
#include "obiblob_indexer.h"
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
*/
#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
* are used ("0;1;2;...;n"), considering ELEMENTS_NAMES_MAX.
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO
*/
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
*/
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
*/
#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
* are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX.
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO
*/
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
*/
#define FORMATTED_ELT_NAMES_SEPARATOR '\0' /**< The maximum length for comments.
*/
#define NOT_FORMATTED_ELT_NAMES_SEPARATOR ';' /**< The maximum length for comments.
*/
/**
@ -56,42 +60,48 @@ typedef struct Column_reference {
* @brief OBIDMS column header structure.
*/
typedef struct OBIDMS_column_header {
size_t header_size; /**< Size of the header in bytes.
*/
size_t data_size; /**< Size of the data in bytes.
*/
index_t line_count; /**< Number of lines of data allocated.
*/
index_t lines_used; /**< Number of lines of data used.
*/
index_t nb_elements_per_line; /**< Number of elements per line.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator
* (no terminal ';').
* (default are the indices: "0;1;2;...;n").
*/
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
OBIType_t stored_data_type; /**< Type of the data that is actually stored in the data
* part of the column.
*/
time_t creation_date; /**< Date of creation of the file.
*/
obiversion_t version; /**< Version of the column.
*/
obiversion_t cloned_from; /**< Version of the column from which this column
* was cloned from (-1 if it was not created by cloning
* another column).
*/
char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string.
*/
char indexer_name[INDEXER_MAX_NAME+1]; /**< If there is one, the indexer name as a NULL terminated string.
*/
Column_reference_t associated_column; /**< If there is one, the reference to the associated column.
*/
char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string.
*/
size_t header_size; /**< Size of the header in bytes.
*/
size_t data_size; /**< Size of the data in bytes.
*/
index_t line_count; /**< Number of lines of data allocated.
*/
index_t lines_used; /**< Number of lines of data used.
*/
index_t nb_elements_per_line; /**< Number of elements per line.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with '\0' as separator
* and '\0\0' as terminal flag.
* (default are the indices: "0\01\02\0...\0n\0\0").
*/
int elements_names_length; /**< Length of the character array where the elements names are stored.
*/
int elements_names_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the start of each element name in elements_names.
*/
int sorted_elements_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the sorted element names in elements_names_idx.
*/
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
OBIType_t stored_data_type; /**< Type of the data that is actually stored in the data
* part of the column.
*/
time_t creation_date; /**< Date of creation of the file.
*/
obiversion_t version; /**< Version of the column.
*/
obiversion_t cloned_from; /**< Version of the column from which this column
* was cloned from (-1 if it was not created by cloning
* another column).
*/
char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string.
*/
char indexer_name[INDEXER_MAX_NAME+1]; /**< If there is one, the indexer name as a NULL terminated string.
*/
Column_reference_t associated_column; /**< If there is one, the reference to the associated column.
*/
char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string.
*/
} OBIDMS_column_header_t, *OBIDMS_column_header_p;
@ -184,12 +194,13 @@ size_t obi_get_platform_header_size();
* @param nb_lines The number of lines to be stored.
* @param nb_elements_per_line The number of elements per line. // TODO talk about default values
* @param elements_names The names of the elements with ';' as separator (no terminal ';'),
* NULL or "" if the default names are to be used ("0;1;2;...;n").
* NULL or "" if the default names are to be used ("0\01\02\0...\0n").
* @param indexer_name The name of the indexer if there is one associated with the column.
* If NULL or "", the indexer name is set as the column name.
* @param associated_column_name The name of the associated column if there is one.
* @param associated_column_version The version of the associated column if there is one.
* @param comments Optional comments associated with the column.
* @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
*
* @returns A pointer on the newly created column structure.
* @retval NULL if an error occurred.
@ -206,7 +217,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* indexer_name,
const char* associated_column_name,
obiversion_t associated_column_version,
const char* comments
const char* comments,
bool elt_names_formatted
);
@ -353,7 +365,7 @@ int obi_close_header(OBIDMS_column_header_p header);
* @param element_name The name of the element.
*
* @returns The index of the element in a line of the column.
* @retval OBIIdx_NA if an error occurred. // TODO not sure if this is "clean".
* @retval OBIIdx_NA if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -361,6 +373,22 @@ int obi_close_header(OBIDMS_column_header_p header);
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
/**
* @brief Recovers the elements names of the lines of a column, with ';' as separator (i.e. "0;1;2;...;n\0").
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
*
* @returns A pointer on a character array where the elements names are stored.
* @retval NULL if an error occurred.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_get_elements_names(OBIDMS_column_p column);
/**
* @brief Prepares a column to set a value.
*

View File

@ -47,7 +47,7 @@
/**
* Internal function building the file name where the informations about an obiview are stored.
* Internal function building the file name where the informations about a finished, read-only obiview are stored.
*
* @warning The returned pointer has to be freed by the caller.
*
@ -63,7 +63,23 @@ static char* build_obiview_file_name(const char* view_name);
/**
* Internal function checking if a view with a given name already exists in a DMS.
* Internal function building the file name where the informations about an unfinished, writable obiview are stored.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param view_name The name of the view.
*
* @returns A pointer to the file name.
* @retval NULL if an error occurred.
*
* @since February 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static char* build_unfinished_obiview_file_name(const char* view_name);
/**
* Internal function checking if a view (either finished or unfinished) with a given name already exists in a DMS.
*
* @param dms The DMS.
* @param view_name The name of the view.
@ -73,7 +89,7 @@ static char* build_obiview_file_name(const char* view_name);
* @since September 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
bool view_exists(OBIDMS_p dms, const char* view_name);
static bool view_exists(OBIDMS_p dms, const char* view_name);
/**
@ -84,7 +100,7 @@ bool view_exists(OBIDMS_p dms, const char* view_name);
* @since June 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t get_platform_view_file_size();
static size_t get_platform_view_file_size();
/**
@ -99,7 +115,7 @@ size_t get_platform_view_file_size();
* @since August 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int enlarge_view_file(Obiview_p view, size_t new_size);
static int enlarge_view_file(Obiview_p view, size_t new_size);
/**
@ -117,7 +133,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size);
* @since August 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int write_comments_to_view_file(Obiview_p view, const char* comments);
static int write_comments_to_view_file(Obiview_p view, const char* comments);
/**
@ -134,7 +150,7 @@ int write_comments_to_view_file(Obiview_p view, const char* comments);
* @since June 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int create_obiview_file(OBIDMS_p dms, const char* view_name);
static int create_obiview_file(OBIDMS_p dms, const char* view_name);
/**
@ -156,7 +172,7 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name);
* @since June 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void update_column_refs(Obiview_p view);
static void update_column_refs(Obiview_p view);
/**
@ -175,7 +191,7 @@ void update_column_refs(Obiview_p view);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int create_column_dict(Obiview_p view);
static int create_column_dict(Obiview_p view);
/**
@ -194,7 +210,7 @@ int create_column_dict(Obiview_p view);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int update_column_dict(Obiview_p view);
static int update_column_dict(Obiview_p view);
/**
@ -219,7 +235,7 @@ int update_column_dict(Obiview_p view);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int update_column_refs_and_dict(Obiview_p view);
static int update_column_refs_and_dict(Obiview_p view);
/**
@ -239,7 +255,7 @@ int update_column_refs_and_dict(Obiview_p view);
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int update_lines(Obiview_p view, index_t line_count);
static int update_lines(Obiview_p view, index_t line_count);
/**
@ -257,7 +273,71 @@ int update_lines(Obiview_p view, index_t line_count);
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
/**
* @brief Saves a view, updating its informations in the view file.
*
* @warning The view must be writable.
*
* @param view A pointer on the view.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int save_view(Obiview_p view);
/**
* @brief Rename a view file once the view is finished, replacing the '*.obiview_unfinished' extension with '*.obiview'.
*
* @param view A pointer on the view.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since February 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int rename_finished_view(Obiview_p view);
/**
* @brief Finishes a view: check the predicates, save all the informations, rename the view file.
*
* @param view A pointer on the view.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since February 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int finish_view(Obiview_p view);
/**
* @brief Closes an opened view.
*
* @warning Doesn't save the view.
*
* @param view A pointer on the view.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @see obi_save_and_close_view()
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int close_view(Obiview_p view);
/**
@ -276,7 +356,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p);
static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p);
/**
@ -294,7 +374,7 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
static int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
/****** PREDICATE FUNCTIONS *******/
@ -313,7 +393,7 @@ int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* view_has_nuc_sequence_column(Obiview_p view);
static char* view_has_nuc_sequence_column(Obiview_p view);
/**
@ -330,7 +410,7 @@ char* view_has_nuc_sequence_column(Obiview_p view);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* view_has_quality_column(Obiview_p view);
static char* view_has_quality_column(Obiview_p view);
/**
@ -347,7 +427,7 @@ char* view_has_quality_column(Obiview_p view);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* view_has_id_column(Obiview_p view);
static char* view_has_id_column(Obiview_p view);
/**
@ -364,7 +444,7 @@ char* view_has_id_column(Obiview_p view);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* view_has_definition_column(Obiview_p view);
static char* view_has_definition_column(Obiview_p view);
/**
@ -381,7 +461,7 @@ char* view_has_definition_column(Obiview_p view);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* view_check_qual_match_seqs(Obiview_p view);
static char* view_check_qual_match_seqs(Obiview_p view);
/**
@ -396,7 +476,7 @@ char* view_check_qual_match_seqs(Obiview_p view);
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view));
static char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view));
/**
@ -410,7 +490,7 @@ char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obivi
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* view_check_all_predicates(Obiview_p view);
static char* view_check_all_predicates(Obiview_p view);
/************************************************************************
@ -443,11 +523,35 @@ static char* build_obiview_file_name(const char* view_name)
}
bool view_exists(OBIDMS_p dms, const char* view_name)
static char* build_unfinished_obiview_file_name(const char* view_name)
{
struct dirent* dp;
char* file_name;
char* file_name;
// Build file name
file_name = (char*) malloc((strlen(view_name) + 19 + 1)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a view file name");
return NULL;
}
if (sprintf(file_name, "%s.obiview_unfinished", view_name) < 0)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nProblem building an unfinished obiview file name");
return NULL;
}
return file_name;
}
static bool view_exists(OBIDMS_p dms, const char* view_name)
{
struct dirent* dp;
char* file_name;
// Check finished views
// Create file name
file_name = build_obiview_file_name(view_name);
if (file_name == NULL)
@ -458,13 +562,38 @@ bool view_exists(OBIDMS_p dms, const char* view_name)
if ((dp->d_name)[0] == '.')
continue;
if (strcmp(dp->d_name, file_name) == 0)
{
free(file_name);
return true;
}
}
free(file_name);
// Check unfinished views
// Create file name
file_name = build_unfinished_obiview_file_name(view_name);
if (file_name == NULL)
return -1;
while ((dp = readdir(dms->view_directory)) != NULL)
{
if ((dp->d_name)[0] == '.')
continue;
if (strcmp(dp->d_name, file_name) == 0)
{
free(file_name);
return true;
}
}
free(file_name);
return false;
}
size_t get_platform_view_file_size()
static size_t get_platform_view_file_size()
{
size_t obiview_size;
size_t rounded_obiview_size;
@ -480,7 +609,7 @@ size_t get_platform_view_file_size()
}
int enlarge_view_file(Obiview_p view, size_t new_size)
static int enlarge_view_file(Obiview_p view, size_t new_size)
{
int obiview_file_descriptor;
double multiple;
@ -488,7 +617,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size)
char* file_name;
// Create file name
file_name = build_obiview_file_name((view->infos)->name);
file_name = build_unfinished_obiview_file_name((view->infos)->name);
if (file_name == NULL)
return -1;
@ -556,7 +685,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size)
}
int write_comments_to_view_file(Obiview_p view, const char* comments)
static int write_comments_to_view_file(Obiview_p view, const char* comments)
{
size_t new_size;
@ -580,14 +709,14 @@ int write_comments_to_view_file(Obiview_p view, const char* comments)
}
int create_obiview_file(OBIDMS_p dms, const char* view_name)
static int create_obiview_file(OBIDMS_p dms, const char* view_name)
{
char* file_name;
int obiview_file_descriptor;
size_t file_size;
// Create file name
file_name = build_obiview_file_name(view_name);
file_name = build_unfinished_obiview_file_name(view_name);
if (file_name == NULL)
return -1;
@ -634,7 +763,7 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name)
}
void update_column_refs(Obiview_p view)
static void update_column_refs(Obiview_p view)
{
int i;
@ -646,7 +775,7 @@ void update_column_refs(Obiview_p view)
}
int create_column_dict(Obiview_p view)
static int create_column_dict(Obiview_p view)
{
int i;
@ -681,7 +810,7 @@ int create_column_dict(Obiview_p view)
}
int update_column_dict(Obiview_p view)
static int update_column_dict(Obiview_p view)
{
// Re-initialize the dictionary to rebuild it from scratch
ht_free(view->column_dict);
@ -693,14 +822,14 @@ int update_column_dict(Obiview_p view)
}
int update_column_refs_and_dict(Obiview_p view)
static int update_column_refs_and_dict(Obiview_p view)
{
update_column_refs(view);
return update_column_dict(view);
}
int update_lines(Obiview_p view, index_t line_count)
static int update_lines(Obiview_p view, index_t line_count)
{
int i;
@ -739,7 +868,7 @@ int update_lines(Obiview_p view, index_t line_count)
}
OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
{
int i;
OBIDMS_column_p column = NULL;
@ -799,7 +928,161 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
}
int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
static int save_view(Obiview_p view)
{
// Check that the view is not read-only
if (view->read_only)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to save a read-only view");
return -1;
}
// Store reference for the line selection associated with that view if there is one
if (view->line_selection != NULL) // Unnecessary in theory, the line selection references are already saved
{
strcpy(((view->infos)->line_selection).column_name, ((view->line_selection)->header)->name);
((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
(view->infos)->all_lines = false;
}
else // Necessary because line selection could have been deleted if a column was cloned
{
(((view->infos)->line_selection).column_name)[0] = '\0';
((view->infos)->line_selection).version = -1;
(view->infos)->all_lines = true;
}
update_column_refs(view);
return 0;
}
static int rename_finished_view(Obiview_p view)
{
char* old_name;
char* new_name;
char* path_old_name;
char* path_new_name;
char* full_path_old_name;
char* full_path_new_name;
old_name = build_unfinished_obiview_file_name((view->infos)->name);
new_name = build_obiview_file_name((view->infos)->name);
path_old_name = malloc(MAX_PATH_LEN);
path_new_name = malloc(MAX_PATH_LEN);
strcpy(path_old_name, "VIEWS/");
strcat(path_old_name, old_name);
strcpy(path_new_name, "VIEWS/");
strcat(path_new_name, new_name);
full_path_old_name = obi_dms_get_full_path(view->dms, path_old_name);
full_path_new_name = obi_dms_get_full_path(view->dms, path_new_name);
if (rename(full_path_old_name, full_path_new_name) < 0)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError renaming the file of a finished view: %s", full_path_new_name);
free(old_name);
free(new_name);
return -1;
}
free(old_name);
free(new_name);
free(path_new_name);
free(path_old_name);
free(full_path_old_name);
free(full_path_new_name);
return 0;
}
static int finish_view(Obiview_p view)
{
char* predicates;
// Check that the view is not read-only
if (view->read_only)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to save a read-only view");
return -1;
}
// Check predicates
predicates = view_check_all_predicates(view);
if (predicates == NULL)
{
obidebug(1, "\nView predicates not respected");
return -1; // TODO reverse view (delete files)
}
else
{
write_comments_to_view_file(view, predicates);
free(predicates);
}
if (save_view(view) < 0)
return -1;
if (rename_finished_view(view) < 0)
return -1;
// Flag the view as finished
(view->infos)->finished = true;
return 0;
}
static int close_view(Obiview_p view)
{
int i;
int ret_value;
ret_value = 0;
for (i=0; i < ((view->infos)->column_count); i++)
{
if (obi_close_column((view->columns)[i]) < 0)
{
obidebug(1, "\nError closing a column while closing a view");
ret_value = -1;
}
}
// Close line selection if there is one
if (view->line_selection != NULL)
{
if (obi_close_column(view->line_selection) < 0)
{
obidebug(1, "\nError closing a line selection while closing a view");
ret_value = -1;
}
}
// Free the column dictionary
ht_free(view->column_dict);
// Unmap view file
if (obi_view_unmap_file(view->dms, view->infos) < 0)
{
obidebug(1, "\nError unmaping a view file while closing a view");
ret_value = -1;
}
free(view);
return ret_value;
}
static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
{
int i;
char* column_name = NULL;
@ -846,7 +1129,7 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
}
int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
static int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
{
if (((*line_nb_p)+1) > ((view->infos)->line_count))
{
@ -865,7 +1148,7 @@ int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
/****** PREDICATE FUNCTIONS *******/
char* view_has_nuc_sequence_column(Obiview_p view)
static char* view_has_nuc_sequence_column(Obiview_p view)
{
char* predicate;
@ -889,7 +1172,7 @@ char* view_has_nuc_sequence_column(Obiview_p view)
}
char* view_has_quality_column(Obiview_p view)
static char* view_has_quality_column(Obiview_p view)
{
char* predicate;
@ -913,7 +1196,7 @@ char* view_has_quality_column(Obiview_p view)
}
char* view_has_id_column(Obiview_p view)
static char* view_has_id_column(Obiview_p view)
{
char* predicate;
@ -936,7 +1219,8 @@ char* view_has_id_column(Obiview_p view)
}
}
char* view_has_definition_column(Obiview_p view)
static char* view_has_definition_column(Obiview_p view)
{
char* predicate;
@ -960,7 +1244,7 @@ char* view_has_definition_column(Obiview_p view)
}
char* view_check_qual_match_seqs(Obiview_p view)
static char* view_check_qual_match_seqs(Obiview_p view)
{
index_t i, j, k;
index_t nb_elements_per_line;
@ -1053,13 +1337,13 @@ char* view_check_qual_match_seqs(Obiview_p view)
}
char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view))
static char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view))
{
return predicate_function(view);
}
char* view_check_all_predicates(Obiview_p view)
static char* view_check_all_predicates(Obiview_p view)
{
int i, j;
size_t size_to_allocate;
@ -1195,7 +1479,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
}
// Map view file
view->infos = obi_view_map_file(dms, view_name);
view->infos = obi_view_map_file(dms, view_name, false);
if (view->infos == NULL)
{
obidebug(1, "\nError mapping the informations of a new view");
@ -1236,7 +1520,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
// If there is a new line selection, build it by combining it with the one from the view to clone if there is one
else if (line_selection != NULL)
{
view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL);
view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL, false);
if ((view->line_selection) == NULL)
{
obidebug(1, "\nError creating a column corresponding to a line selection");
@ -1305,7 +1589,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
if (write_comments_to_view_file(view, clone_comment) < 0)
{
obidebug(1, "\nError writing comments when creating a view");
obi_close_view(view);
close_view(view);
return NULL;
}
}
@ -1341,7 +1625,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
if (write_comments_to_view_file(view, comments) < 0)
{
obidebug(1, "\nError writing comments when creating a view");
obi_close_view(view);
close_view(view);
return NULL;
}
@ -1360,7 +1644,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
// Create the column dictionary (hash table) associating column names (or aliases) to column pointers
if (create_column_dict(view) < 0)
{
obi_close_view(view);
close_view(view);
return NULL;
}
@ -1409,7 +1693,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
return NULL;
view = obi_new_view(dms, view_name, view_to_clone, line_selection, comments);
obi_close_view(view_to_clone);
close_view(view_to_clone);
return view;
}
@ -1511,26 +1795,43 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
return NULL;
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column);
obi_close_view(view_to_clone);
close_view(view_to_clone);
return view;
}
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bool finished)
{
char* file_name;
Obiview_infos_p view_infos;
int obiview_file_descriptor;
size_t file_size;
int open_flag;
int mmap_flag;
// Create file name
file_name = build_obiview_file_name(view_name);
if (finished)
file_name = build_obiview_file_name(view_name);
else
file_name = build_unfinished_obiview_file_name(view_name);
if (file_name == NULL)
return NULL;
// Set flags (read-only or not)
if (finished)
{
open_flag = O_RDONLY;
mmap_flag = PROT_READ;
}
else
{
open_flag = O_RDWR;
mmap_flag = PROT_READ | PROT_WRITE;
}
// Open view file
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDWR, 0777);
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, open_flag, 0777);
if (obiview_file_descriptor < 0)
{
if (errno == ENOENT)
@ -1560,7 +1861,7 @@ Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
// Map the view infos structure
view_infos = mmap(NULL,
file_size,
PROT_READ | PROT_WRITE,
mmap_flag,
MAP_SHARED,
obiview_file_descriptor,
0
@ -1585,17 +1886,20 @@ Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
{
char* file_name;
int obiview_file_descriptor;
size_t file_size;
char* file_name;
int obiview_file_descriptor;
size_t file_size;
// Get file name
file_name = build_obiview_file_name(view_infos->name);
if (view_infos->finished)
file_name = build_obiview_file_name(view_infos->name);
else
file_name = build_unfinished_obiview_file_name(view_infos->name);
if (file_name == NULL)
return -1;
// Open view file
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDWR, 0777);
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDONLY, 0777);
if (obiview_file_descriptor < 0)
{
obi_set_errno(OBIVIEW_ERROR);
@ -1661,13 +1965,9 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
}
// Map view file
view->infos = obi_view_map_file(dms, view_name);
// Check that the view is finished and can be opened
if ((view->infos)->finished == false)
view->infos = obi_view_map_file(dms, view_name, true);
if ((view->infos) == NULL)
{
obidebug(1, "\nError opening a view: the view is not finished");
obi_view_unmap_file(view->dms, view->infos);
free(view);
return NULL;
}
@ -1697,7 +1997,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
if (column_pointer == NULL)
{
obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, column_name, column_version);
obi_close_view(view);
close_view(view);
return NULL;
}
(view->columns)[i] = column_pointer;
@ -1713,7 +2013,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
if (create_column_dict(view) < 0)
{
obidebug(1, "\nError creating the column dictionary when opening a view");
obi_close_view(view);
close_view(view);
return NULL;
}
@ -1792,7 +2092,7 @@ int obi_view_add_column(Obiview_p view,
// Open or create the column
if (create)
{ // Create column
column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, indexer_name, associated_column_name, associated_column_version, comments);
column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, indexer_name, associated_column_name, associated_column_version, comments, false);
if (column == NULL)
{
obidebug(1, "\nError creating a column to add to a view");
@ -1963,103 +2263,16 @@ int obi_view_create_column_alias(Obiview_p view, const char* current_name, const
}
int obi_save_view(Obiview_p view)
{
// Check that the view is not read-only
if (view->read_only)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to save a read-only view");
return -1;
}
// Store reference for the line selection associated with that view if there is one
if (view->line_selection != NULL) // Unnecessary in theory, the line selection references are already saved
{
strcpy(((view->infos)->line_selection).column_name, ((view->line_selection)->header)->name);
((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
(view->infos)->all_lines = false;
}
else // Necessary because line selection could have been deleted if a column was cloned
{
(((view->infos)->line_selection).column_name)[0] = '\0';
((view->infos)->line_selection).version = -1;
(view->infos)->all_lines = true;
}
update_column_refs(view);
return 0;
}
int obi_close_view(Obiview_p view)
{
int i;
int ret_value;
ret_value = 0;
for (i=0; i < ((view->infos)->column_count); i++)
{
if (obi_close_column((view->columns)[i]) < 0)
{
obidebug(1, "\nError closing a column while closing a view");
ret_value = -1;
}
}
// Close line selection if there is one
if (view->line_selection != NULL)
{
if (obi_close_column(view->line_selection) < 0)
{
obidebug(1, "\nError closing a line selection while closing a view");
ret_value = -1;
}
}
// Flag the view as finished
(view->infos)->finished = true;
// Free the column dictionary
ht_free(view->column_dict);
// Unmap view file
if (obi_view_unmap_file(view->dms, view->infos) < 0)
{
obidebug(1, "\nError unmaping a view file while closing a view");
ret_value = -1;
}
free(view);
return ret_value;
}
int obi_save_and_close_view(Obiview_p view)
{
char* predicates;
if (!(view->read_only))
{
predicates = view_check_all_predicates(view);
if (predicates == NULL)
{
obidebug(1, "\nView predicates not respected");
return -1; // TODO reverse view (delete files)
}
else
{
write_comments_to_view_file(view, predicates);
free(predicates);
}
if (obi_save_view(view) < 0)
// Finish and save the view if it is not read-only
if ( ! (view->read_only))
if (finish_view(view) < 0)
return -1;
}
if (obi_close_view(view) < 0)
if (close_view(view) < 0)
return -1;
return 0;
}

View File

@ -242,6 +242,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
*
* @param dms A pointer on the OBIDMS.
* @param view_name The unique name identifying the view.
* @param finished Whether the view is finished or not.
*
* @returns A pointer on the mapped view infos structure.
* @retval NULL if an error occurred.
@ -249,7 +250,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
* @since June 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name);
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bool finished);
/**
@ -444,42 +445,6 @@ int obi_select_line(Obiview_p view, index_t line_nb);
int obi_select_lines(Obiview_p view, index_t* line_nbs);
/**
* @brief Saves a view, writing it in the view file.
*
* The view is written at the end of the view file, following the latest written view.
*
* @warning The view must be writable.
*
* @param view A pointer on the view.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_save_view(Obiview_p view);
/**
* @brief Closes an opened view.
*
* @warning Uses obi_save_and_close_view() to automatically save the view if it's not already saved in the view file.
*
* @param view A pointer on the view.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_view(Obiview_p view);
/**
* @brief Closes an opened view, and saves it if it is not read-only (meaning it is not already saved in the view file).
*

View File

@ -1,16 +1,22 @@
/*
* sse_banded_LCS_alignment.c
*
* Created on: 7 nov. 2012
* Author: celine mercier
/****************************************************************************
* LCS alignment of two sequences *
****************************************************************************/
/**
* @file sse_banded_LCS_alignment.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date November 7th 2012
* @brief Functions handling the alignment of two sequences to compute their Longest Common Sequence.
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdint.h>
#include <stdbool.h>
#include <limits.h>
#include "obierrno.h"
#include "obidebug.h"
@ -24,6 +30,231 @@
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**************************************************************************
*
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
*
**************************************************************************/
/**
* @brief Internal function printing a 128 bits register as 8 16-bits integers.
*
* @param r The register to print.
*
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static void printreg(__m128i r);
/**
* @brief Internal function extracting a 16-bits integer from a 128 bits register.
*
* @param r The register to read.
* @param p The position at which the integer should be read (between 0 and 7).
*
* @returns The extracted integer.
*
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static inline int extract_reg(__m128i r, int p);
/**
* @brief Internal function aligning two sequences, computing the lengths of their Longest Common Subsequence and of their alignment.
*
* @warning The first argument (seq1) must correspond to the longest sequence.
*
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
* @param l1 The length of the first sequence.
* @param l2 The length of the second sequence.
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
* @param address A pointer, aligned on a 16 bits boundary, on the int array where the initial values for the alignment length are stored,
* as prepared for the alignment by initializeAddressWithGaps().
* @param lcs_length A pointer on the int where the LCS length will be stored.
* @param ali_length A pointer on the int where the alignment length will be stored.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length);
/**
* @brief Internal function aligning two sequences, computing the length of their Longest Common Subsequence (and not the alignment length).
*
* @warning The first argument (seq1) must correspond to the longest sequence.
*
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
* @param l1 The length of the first sequence.
* @param l2 The length of the second sequence.
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
* @param lcs_length A pointer on the int where the LCS length will be stored.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length);
/**
* @brief Internal function calculating the length of the left band for the banded alignment.
*
* @param lmax The length of the longest sequence to align.
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
*
* @returns The length of the left band.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int calculateLeftBandLength(int lmax, int LCSmin);
/**
* @brief Internal function calculating the length of the right band for the banded alignment.
*
* @param lmin The length of the shortest sequence to align.
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
*
* @returns The length of the right band.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int calculateRightBandLength(int lmin, int LCSmin);
/**
* @brief Internal function calculating the length of the complete band for the banded alignment.
*
* @param bandLengthRight The length of the right band for the banded alignment, as computed by calculateRightBandLength().
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
*
* @returns The length of the complete band.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft);
/**
* @brief Internal function calculating the size to allocate for the int array where the alignment length will be stored in the matrix.
*
* @param maxLen The length of the longest sequence to align.
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
*
* @returns The size to allocate in bytes.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int calculateSizeToAllocate(int maxLen, int LCSmin);
/**
* @brief Internal function initializing the int array corresponding to a sequence to align with default values.
*
* @param seq The int array corresponding to the sequence to align, as prepared by putSeqInSeq() or putBlobInSeq().
* @param size The number of positions to initialize.
* @param iniValue The value that the positions should be initialized to.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void iniSeq(int16_t* seq, int size, int16_t iniValue);
/**
* @brief Internal function building the int array corresponding to a sequence to align.
*
* Each nucleotide is stored as a short int (int16_t).
*
* @param seq A pointer on the allocated int array.
* @param s A pointer on the character string corresponding to the sequence.
* @param l The length of the sequence.
* @param reverse A boolean indicating whether the sequence should be written reversed
* (for the second sequence to align).
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void putSeqInSeq(int16_t* seq, char* s, int l, bool reverse);
/**
* @brief Internal function building the int array corresponding to an obiblob containing a sequence.
*
* Each nucleotide is stored as a short int (int16_t).
*
* @param seq A pointer on the allocated int array.
* @param b A pointer on the obiblob containing the sequence.
* @param l The length of the (decoded) sequence.
* @param reverse A boolean indicating whether the sequence should be written reversed
* (for the second sequence to align).
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void putBlobInSeq(int16_t* seq, Obi_blob_p b, int l, bool reverse);
/**
* @brief Internal function preparing an int array with the initial values for the alignment lengths before the alignment.
*
* The int array containing the initial alignment lengths (corresponding to the first line of the diagonalized band of the alignment matrix)
* needs to be initialized with external gap lengths before the alignment.
*
* @param address A pointer, aligned on a 16 bits boundary, on the int array where the initial values for the alignment length are to be stored.
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
* @param lmax The length of the longest sequence to align.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int lmax);
/**
* @brief Internal function aligning two sequences, computing the lengths of their Longest Common Subsequence and of their alignment.
*
* @warning The first argument (seq1) must correspond to the longest sequence.
*
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
* @param l1 The length of the first sequence.
* @param l2 The length of the second sequence.
* @param normalize Whether the score should be normalized with the reference sequence length.
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
* @param address A pointer, aligned on a 16 bits boundary, on an allocated int array where the initial values for the alignment length will be stored.
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
* @param lcs_length A pointer on the int where the LCS length will be stored.
* @param ali_length A pointer on the int where the alignment length will be stored.
*
* @returns The alignment score (normalized according to the parameters).
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length);
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
*
************************************************************************/
static void printreg(__m128i r)
{
int16_t a0,a1,a2,a3,a4,a5,a6,a7;
@ -61,7 +292,6 @@ static inline int extract_reg(__m128i r, int p)
}
// TODO warning on length order
void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length)
{
register int j;
@ -287,7 +517,6 @@ void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int
}
// TODO warning on length order
void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length)
{
register int j;
@ -319,7 +548,7 @@ void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int
// Initialisations
odd_BLL = bandLengthLeft & 1;
even_BLL = !odd_BLL;
even_BLL = !odd_BLL;
numberOfRegistersPerLine = bandLengthTotal / 8;
numberOfRegistersFor3Lines = 3 * numberOfRegistersPerLine;
@ -446,15 +675,14 @@ int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft)
}
// TODO that's gonna be fun to doc
int calculateSizeToAllocate(int maxLen, int minLen, int LCSmin)
int calculateSizeToAllocate(int maxLen, int LCSmin)
{
int size;
size = calculateLeftBandLength(maxLen, LCSmin);
size *= 2;
size = (size & (~ (int)7)) + (( size & (int)7) ? 8:0); // Closest greater 8 multiple
size = (size & (~ (int)7)) + ((size & (int)7) ? 8:0); // Closest greater 8 multiple
size *= 3;
size += 16;
@ -522,13 +750,13 @@ void putBlobInSeq(int16_t* seq, Obi_blob_p b, int l, bool reverse)
}
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int l1)
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int lmax)
{
int i;
int address_00, x_address_10, address_01, address_01_shifted;
int numberOfRegistersPerLine;
int bm;
int value=INT16_MAX-l1;
int value=INT16_MAX-lmax;
numberOfRegistersPerLine = bandLengthTotal / 8;
bm = bandLengthLeft%2;
@ -556,7 +784,6 @@ void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLe
}
// TODO warning on length order
double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length)
{
double id;
@ -610,10 +837,14 @@ double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool n
// PUBLIC FUNCTIONS
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
*
**********************************************************************/
int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool similarity_mode)
int calculateLCSmin(int lmax, int lmin, double threshold, bool normalize, int reference, bool similarity_mode)
{
int LCSmin;
@ -622,16 +853,16 @@ int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int refere
if (normalize)
{
if (reference == MINLEN)
LCSmin = threshold*l2;
LCSmin = threshold*lmin;
else // ref = maxlen or alilen
LCSmin = threshold*l1;
LCSmin = threshold*lmax;
}
else if (similarity_mode)
LCSmin = threshold;
else if (reference == MINLEN) // not similarity_mode
LCSmin = l2 - threshold;
LCSmin = lmin - threshold;
else // not similarity_mode and ref = maxlen or alilen
LCSmin = l1 - threshold;
LCSmin = lmax - threshold;
}
else
LCSmin = 0;
@ -669,6 +900,14 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
lmin = l1;
}
// Check that the sequences are not greater than what can be aligned using the 16 bits registers (as the LCS and alignment lengths are kept on 16 bits)
if (lmax > SHRT_MAX)
{
obi_set_errno(OBI_ALIGN_ERROR);
obidebug(1, "\nError: can not align sequences longer than %d (as the LCS and alignment lengths are kept on 16 bits)", SHRT_MAX);
return 0; // TODO DOUBLE_MIN to flag error
}
// If the score is expressed as a normalized distance, get the corresponding identity
if (!similarity_mode && normalize)
threshold = 1.0 - threshold;
@ -679,7 +918,7 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
// Allocate space for matrix band if the alignment length must be computed
if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
{
sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
sizeToAllocateForBand = calculateSizeToAllocate(lmax, LCSmin);
address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
if (address == NULL)
{
@ -764,6 +1003,14 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
lmin = l1;
}
// Check that the sequences are not greater than what can be aligned using the 16 bits registers (as the LCS and alignment lengths are kept on 16 bits)
if (lmax > SHRT_MAX)
{
obi_set_errno(OBI_ALIGN_ERROR);
obidebug(1, "\nError: can not align sequences longer than %d (as the LCS and alignment lengths are kept on 16 bits)", SHRT_MAX);
return 0; // TODO DOUBLE_MIN to flag error
}
// If the score is expressed as a normalized distance, get the corresponding identity
if (!similarity_mode && normalize)
threshold = 1.0 - threshold;
@ -774,13 +1021,13 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
// Allocate space for matrix band if the alignment length must be computed
if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
{
sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
sizeToAllocateForBand = calculateSizeToAllocate(lmax, LCSmin);
address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
if (address == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError getting a memory address aligned on 16 bytes boundary");
return 0; // TODO DOUBLE_MIN
obidebug(1, "\nError getting a memory address aligned on a 16 bits boundary");
return 0; // TODO DOUBLE_MIN to flag error
}
}

View File

@ -1,10 +1,15 @@
/*
* sse_banded_LCS_alignment.h
*
* Created on: november 29, 2012
* Author: mercier
/****************************************************************************
* LCS alignment of two sequences header file *
****************************************************************************/
/**
* @file sse_banded_LCS_alignment.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date November 7th 2012
* @brief header file for the functions handling the alignment of two sequences to compute their Longest Common Sequence.
*/
#ifndef SSE_BANDED_LCS_ALIGNMENT_H_
#define SSE_BANDED_LCS_ALIGNMENT_H_
@ -15,13 +20,97 @@
#include "obiblob.h"
#define ALILEN (0) // TODO enum
/**
* @brief Macros for reference lengths to use when aligning.
*
* @since 2012
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
#define ALILEN (0)
#define MAXLEN (1)
#define MINLEN (2)
// TODO doc
int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool lcsmode);
/**
* @brief Function calculating the minimum length of the Longest Common Subsequence between two sequences to be above a chosen score threshold.
*
* @warning The first argument (lmax) must correspond to length of the longest sequence.
*
* @param lmax The length of the longest sequence to align.
* @param lmin The length of the shortest sequence to align.
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
* @param normalize Whether the score should be normalized with the reference sequence length.
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
*
* @returns The minimum length of the Longest Common Subsequence between two sequences to be above the chosen score threshold.
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int calculateLCSmin(int lmax, int lmin, double threshold, bool normalize, int reference, bool similarity_mode);
/**
* @brief Function aligning two sequences.
*
* The alignment algorithm is a banded global alignment algorithm, a modified version of the classical Needleman and Wunsch algorithm,
* and uses indices based on the length of the Longest Common Subsequence between the two sequences.
*
* Note: the sequences do not need to be ordered (e.g. with the longest sequence as first argument).
*
* @param seq1 A pointer on the character string corresponding to the first sequence.
* @param seq2 A pointer on the character string corresponding to the second sequence.
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
* @param normalize Whether the score should be normalized with the reference sequence length.
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
* @param lcs_length A pointer on the int where the LCS length will be stored.
* @param ali_length A pointer on the int where the alignment length will be stored.
*
* @returns The alignment score (normalized according to the parameters).
*
* @since 2012
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
/**
* @brief Function aligning two sequences encoded in obiblobs.
*
* The alignment algorithm is a banded global alignment algorithm, a modified version of the classical Needleman and Wunsch algorithm,
* and uses indices based on the length of the Longest Common Subsequence between the two sequences.
*
* Note: the obiblobs do not need to be ordered (e.g. with the obiblob containing the longest sequence as first argument).
*
* @param seq1 A pointer on the blob containing the first sequence.
* @param seq2 A pointer on the blob containing the second sequence.
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
* @param normalize Whether the score should be normalized with the reference sequence length.
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
* @param lcs_length A pointer on the int where the LCS length will be stored.
* @param ali_length A pointer on the int where the alignment length will be stored.
*
* @returns The alignment score (normalized according to the parameters).
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
#endif

View File

@ -8,8 +8,6 @@
#include "obidmscolumn.h"
#include "obiview.h"
//#include "../libutils/utilities.h"
//#include "../libfasta/sequence.h"
inline static uchar_v hash4m128(uchar_v frag)
@ -242,7 +240,7 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t se
fprintf(stderr,"Building kmer tables...");
seq_count = (seq_col->header)->lines_used;
seq_count = (view->infos)->line_count;
// Allocate memory for the table structure
ktable = (Kmer_table_p) malloc(sizeof(Kmer_table_t) * seq_count);
@ -267,6 +265,44 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t se
}
Kmer_table_p hash_two_seq_columns(Obiview_p view1, OBIDMS_column_p seq1_col, index_t seq1_idx,
Obiview_p view2, OBIDMS_column_p seq2_col, index_t seq2_idx)
{
size_t seq1_count;
size_t seq2_count;
Kmer_table_p ktable1;
Kmer_table_p ktable2;
Kmer_table_p ktable;
seq1_count = (view1->infos)->line_count;
seq2_count = (view2->infos)->line_count;
// Build the two tables then concatenate them
ktable1 = hash_seq_column(view1, seq1_col, seq1_idx);
if (ktable1 == NULL)
return NULL;
ktable2 = hash_seq_column(view2, seq2_col, seq2_idx);
if (ktable2 == NULL)
return NULL;
// Realloc to hold the 2 tables
ktable = realloc(ktable1, sizeof(Kmer_table_t) * (seq1_count + seq2_count));
if (ktable == NULL)
{
free_kmer_tables(ktable2, seq2_count);
return NULL;
}
// Concatenate
memcpy(ktable+seq1_count, ktable2, sizeof(Kmer_table_t) * seq2_count);
// Free copied table
free(ktable2);
return ktable;
}
void free_kmer_tables(Kmer_table_p ktable, size_t count)
{
size_t i;

View File

@ -18,7 +18,11 @@ typedef struct {
} Kmer_table_t, *Kmer_table_p;
// TODO doc
Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t seq_idx);
Kmer_table_p hash_two_seq_columns(Obiview_p view1, OBIDMS_column_p seq1_col, index_t seq1_idx,
Obiview_p view2, OBIDMS_column_p seq2_col, index_t seq2_idx);
void align_filters(Kmer_table_p ktable, Obi_blob_p seq1, Obi_blob_p seq2, index_t idx1, index_t idx2, double threshold, bool normalize, int reference, bool similarity_mode, double* score, int* LCSmin, bool can_be_identical);
void free_kmer_tables(Kmer_table_p ktable, size_t count);

View File

@ -116,3 +116,193 @@ void* obi_get_memory_aligned_on_16(int size, int* shift)
return (memory);
}
/*
* A generic implementation of binary search for the Linux kernel
*
* Copyright (C) 2008-2009 Ksplice, Inc.
* Author: Tim Abbott <tabbott@ksplice.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; version 2.
*/
void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data,
int (*cmp)(const void *key, const void *elt, const void* user_data))
{
size_t start = 0;
size_t end = num;
size_t mid;
int result;
while (start < end)
{
mid = start + (end - start) / 2;
result = cmp(key, base + mid * size, user_data);
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
return (void*)base + mid * size;
}
return NULL;
}
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
*/
#define MIN(a,b) ((a) < (b) ? a : b)
#define swapcode(TYPE, parmi, parmj, n) { \
long i = (n) / sizeof (TYPE); \
register TYPE *pi = (TYPE *) (parmi); \
register TYPE *pj = (TYPE *) (parmj); \
do { \
register TYPE t = *pi; \
*pi++ = *pj; \
*pj++ = t; \
} while (--i > 0); \
}
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
static __inline void
swapfunc(char *a, char *b, int n, int swaptype)
{
if (swaptype <= 1)
swapcode(long, a, b, n)
else
swapcode(char, a, b, n)
}
#define swap(a, b) \
if (swaptype == 0) { \
long t = *(long *)(a); \
*(long *)(a) = *(long *)(b); \
*(long *)(b) = t; \
} else \
swapfunc(a, b, es, swaptype)
#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
static __inline char *
med3(char *a, char *b, char *c, const void *user_data, int (*cmp)(const void *, const void *, const void *))
{
return cmp(a, b, user_data) < 0 ?
(cmp(b, c, user_data) < 0 ? b : (cmp(a, c, user_data) < 0 ? c : a ))
:(cmp(b, c, user_data) > 0 ? b : (cmp(a, c, user_data) < 0 ? a : c ));
}
void
qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *))
{
char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
int d, r, swaptype, swap_cnt;
register char *a = aa;
loop: SWAPINIT(a, es);
swap_cnt = 0;
if (n < 7) {
for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0;
pl -= es)
swap(pl, pl - es);
return;
}
pm = (char *)a + (n / 2) * es;
if (n > 7) {
pl = (char *)a;
pn = (char *)a + (n - 1) * es;
if (n > 40) {
d = (n / 8) * es;
pl = med3(pl, pl + d, pl + 2 * d, user_data, cmp);
pm = med3(pm - d, pm, pm + d, user_data, cmp);
pn = med3(pn - 2 * d, pn - d, pn, user_data, cmp);
}
pm = med3(pl, pm, pn, user_data, cmp);
}
swap(a, pm);
pa = pb = (char *)a + es;
pc = pd = (char *)a + (n - 1) * es;
for (;;) {
while (pb <= pc && (r = cmp(pb, a, user_data)) <= 0) {
if (r == 0) {
swap_cnt = 1;
swap(pa, pb);
pa += es;
}
pb += es;
}
while (pb <= pc && (r = cmp(pc, a, user_data)) >= 0) {
if (r == 0) {
swap_cnt = 1;
swap(pc, pd);
pd -= es;
}
pc -= es;
}
if (pb > pc)
break;
swap(pb, pc);
swap_cnt = 1;
pb += es;
pc -= es;
}
if (swap_cnt == 0) { /* Switch to insertion sort */
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0;
pl -= es)
swap(pl, pl - es);
return;
}
pn = (char *)a + n * es;
r = MIN(pa - (char *)a, pb - pa);
vecswap(a, pb - r, r);
r = MIN((long)(pd - pc), (long)(pn - pd - es));
vecswap(pb, pn - r, r);
if ((r = pb - pa) > (int)es)
qsort_user_data(a, r / es, es, user_data, cmp);
if ((r = pd - pc) > (int)es) {
/* Iterate rather than recurse to save stack space */
a = pn - r;
n = r / es;
goto loop;
}
/* qsort(pn - r, r / es, es, cmp);*/
}

View File

@ -74,4 +74,41 @@ char* obi_format_date(time_t date);
void* obi_get_memory_aligned_on_16(int size, int* shift);
/**
* @brief Version of quick sort modified to allow the user to provide an
* additional pointer sent to the comparison function.
*
* @param key This is the pointer to the object that serves as key for the search, type-casted as a void*.
* @param base This is the pointer to the first object of the array where the search is performed, type-casted as a void*.
* @param num This is the number of elements in the array pointed by base.
* @param size This is the size in bytes of each element in the array.
* @param user_data This is an additional pointer passed to the comparison function.
* @param cmp This is the function that compares two elements, eventually with an additional pointer.
*
* @returns A pointer to an entry in the array that matches the search key.
* @retval NULL if key is not found.
*
* @since January 2017
* @author original code modified by Celine Mercier (celine.mercier@metabarcoding.org)
*/
void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data,
int (*cmp)(const void *key, const void *elt, const void* user_data));
/**
* @brief Version of quick sort modified to allow the user to provide an
* additional pointer sent to the comparison function.
*
* @param aa This is the pointer to the first element of the array to be sorted.
* @param n This is the number of elements in the array pointed by base.
* @param es This is the size in bytes of each element in the array.
* @param user_data This is an additional pointer passed to the comparison function.
* @param cmp This is the function that compares two elements, eventually with an additional pointer.
*
* @since January 2017
* @author original code modified by Celine Mercier (celine.mercier@metabarcoding.org)
*/
void qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *));
#endif /* UTILS_H_ */