Merged master fixed conflict.
This commit is contained in:
65
python/obitools3/commands/lcs.cfiles
Normal file
65
python/obitools3/commands/lcs.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/char_str_indexer.h
|
||||||
|
../../../src/char_str_indexer.c
|
||||||
|
../../../src/crc64.h
|
||||||
|
../../../src/crc64.c
|
||||||
|
../../../src/dna_seq_indexer.h
|
||||||
|
../../../src/dna_seq_indexer.c
|
||||||
|
../../../src/encode.h
|
||||||
|
../../../src/encode.c
|
||||||
|
../../../src/hashtable.h
|
||||||
|
../../../src/hashtable.c
|
||||||
|
../../../src/murmurhash2.h
|
||||||
|
../../../src/murmurhash2.c
|
||||||
|
../../../src/obi_align.h
|
||||||
|
../../../src/obi_align.c
|
||||||
|
../../../src/obiavl.h
|
||||||
|
../../../src/obiavl.c
|
||||||
|
../../../src/obiblob_indexer.h
|
||||||
|
../../../src/obiblob_indexer.c
|
||||||
|
../../../src/obiblob.h
|
||||||
|
../../../src/obiblob.c
|
||||||
|
../../../src/obidebug.h
|
||||||
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/obidms_taxonomy.c
|
||||||
|
../../../src/obidms.h
|
||||||
|
../../../src/obidms.c
|
||||||
|
../../../src/obidmscolumn_blob.c
|
||||||
|
../../../src/obidmscolumn_blob.h
|
||||||
|
../../../src/obidmscolumn_bool.c
|
||||||
|
../../../src/obidmscolumn_bool.h
|
||||||
|
../../../src/obidmscolumn_char.c
|
||||||
|
../../../src/obidmscolumn_char.h
|
||||||
|
../../../src/obidmscolumn_float.c
|
||||||
|
../../../src/obidmscolumn_float.h
|
||||||
|
../../../src/obidmscolumn_idx.h
|
||||||
|
../../../src/obidmscolumn_idx.c
|
||||||
|
../../../src/obidmscolumn_int.c
|
||||||
|
../../../src/obidmscolumn_int.h
|
||||||
|
../../../src/obidmscolumn_qual.h
|
||||||
|
../../../src/obidmscolumn_qual.c
|
||||||
|
../../../src/obidmscolumn_seq.c
|
||||||
|
../../../src/obidmscolumn_seq.h
|
||||||
|
../../../src/obidmscolumn_str.c
|
||||||
|
../../../src/obidmscolumn_str.h
|
||||||
|
../../../src/obidmscolumn.h
|
||||||
|
../../../src/obidmscolumn.c
|
||||||
|
../../../src/obidmscolumndir.h
|
||||||
|
../../../src/obidmscolumndir.c
|
||||||
|
../../../src/obierrno.h
|
||||||
|
../../../src/obierrno.c
|
||||||
|
../../../src/obilittlebigman.h
|
||||||
|
../../../src/obilittlebigman.c
|
||||||
|
../../../src/obitypes.h
|
||||||
|
../../../src/obitypes.c
|
||||||
|
../../../src/obiview.h
|
||||||
|
../../../src/obiview.c
|
||||||
|
../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../src/uint8_indexer.h
|
||||||
|
../../../src/uint8_indexer.c
|
||||||
|
../../../src/upperband.h
|
||||||
|
../../../src/upperband.c
|
||||||
|
../../../src/utils.h
|
||||||
|
../../../src/utils.c
|
@ -4,7 +4,8 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.obidms._obidms cimport OBIDMS # TODO cimport doesn't work
|
from obitools3.obidms._obidms cimport OBIDMS # TODO cimport doesn't work
|
||||||
from obitools3.utils cimport str2bytes
|
from obitools3.utils cimport str2bytes
|
||||||
|
|
||||||
from obitools3.obidms.capi.obialign cimport obi_lcs_align_one_column
|
from obitools3.obidms.capi.obialign cimport obi_lcs_align_one_column, \
|
||||||
|
obi_lcs_align_two_columns
|
||||||
|
|
||||||
|
|
||||||
import time
|
import time
|
||||||
@ -146,6 +147,13 @@ def addOptions(parser):
|
|||||||
default=False,
|
default=False,
|
||||||
help="Sequence counts are written in the output view. Default: they are not written.")
|
help="Sequence counts are written in the output view. Default: they are not written.")
|
||||||
|
|
||||||
|
group.add_argument('--thread-count','-p', # TODO should probably be in a specific option group
|
||||||
|
action="store", dest="align:threadcount",
|
||||||
|
metavar='<THREAD COUNT>',
|
||||||
|
default=1,
|
||||||
|
type=int,
|
||||||
|
help="Number of threads to use for the computation. Default: one.")
|
||||||
|
|
||||||
|
|
||||||
cpdef align(str dms_n,
|
cpdef align(str dms_n,
|
||||||
str input_view_1_n, str output_view_n,
|
str input_view_1_n, str output_view_n,
|
||||||
@ -156,12 +164,13 @@ cpdef align(str dms_n,
|
|||||||
double threshold=0.0, bint normalize=True,
|
double threshold=0.0, bint normalize=True,
|
||||||
int reference=0, bint similarity_mode=True,
|
int reference=0, bint similarity_mode=True,
|
||||||
bint print_seq=False, bint print_count=False,
|
bint print_seq=False, bint print_count=False,
|
||||||
comments="") :
|
comments="",
|
||||||
|
int thread_count=1) :
|
||||||
|
|
||||||
cdef OBIDMS d
|
cdef OBIDMS d
|
||||||
d = OBIDMS(dms_n)
|
d = OBIDMS(dms_n)
|
||||||
|
|
||||||
# Align 1 column (2 columns not implemented yet)
|
if input_view_2_n == "" and input_column_2_n == "" :
|
||||||
if obi_lcs_align_one_column(d._pointer, \
|
if obi_lcs_align_one_column(d._pointer, \
|
||||||
str2bytes(input_view_1_n), \
|
str2bytes(input_view_1_n), \
|
||||||
str2bytes(input_column_1_n), \
|
str2bytes(input_column_1_n), \
|
||||||
@ -171,6 +180,23 @@ cpdef align(str dms_n,
|
|||||||
str2bytes(comments), \
|
str2bytes(comments), \
|
||||||
print_seq, \
|
print_seq, \
|
||||||
print_count, \
|
print_count, \
|
||||||
|
threshold, normalize, reference, similarity_mode,
|
||||||
|
thread_count) < 0 :
|
||||||
|
raise Exception("Error aligning sequences")
|
||||||
|
else :
|
||||||
|
if obi_lcs_align_two_columns(d._pointer, \
|
||||||
|
str2bytes(input_view_1_n), \
|
||||||
|
str2bytes(input_view_2_n), \
|
||||||
|
str2bytes(input_column_1_n), \
|
||||||
|
str2bytes(input_column_2_n), \
|
||||||
|
str2bytes(input_elt_1_n), \
|
||||||
|
str2bytes(input_elt_2_n), \
|
||||||
|
str2bytes(id_column_1_n), \
|
||||||
|
str2bytes(id_column_2_n), \
|
||||||
|
str2bytes(output_view_n), \
|
||||||
|
str2bytes(comments), \
|
||||||
|
print_seq, \
|
||||||
|
print_count, \
|
||||||
threshold, normalize, reference, similarity_mode) < 0 :
|
threshold, normalize, reference, similarity_mode) < 0 :
|
||||||
raise Exception("Error aligning sequences")
|
raise Exception("Error aligning sequences")
|
||||||
|
|
||||||
@ -199,7 +225,8 @@ def run(config):
|
|||||||
similarity_mode = config['align']['similarity'], \
|
similarity_mode = config['align']['similarity'], \
|
||||||
print_seq = config['align']['printseq'], \
|
print_seq = config['align']['printseq'], \
|
||||||
print_count = config['align']['printcount'], \
|
print_count = config['align']['printcount'], \
|
||||||
comments = comments)
|
comments = comments, \
|
||||||
|
thread_count = config['align']['threadcount'])
|
||||||
|
|
||||||
print("Done.")
|
print("Done.")
|
||||||
|
|
||||||
|
@ -97,8 +97,7 @@ def test_set_and_get(config, infos):
|
|||||||
return
|
return
|
||||||
idx = random_int(config)
|
idx = random_int(config)
|
||||||
value = infos['random_generator'][data_type](config)
|
value = infos['random_generator'][data_type](config)
|
||||||
|
if col.nb_elements_per_line > 1 :
|
||||||
if len(element_names) > 1 :
|
|
||||||
elt = random.choice(element_names)
|
elt = random.choice(element_names)
|
||||||
col[idx][elt] = value
|
col[idx][elt] = value
|
||||||
assert col[idx][elt] == value, "Set value != gotten value "+str(col[idx][elt])+" != "+str(value)
|
assert col[idx][elt] == value, "Set value != gotten value "+str(col[idx][elt])+" != "+str(value)
|
||||||
@ -187,6 +186,7 @@ def create_random_column(config, infos) :
|
|||||||
elements_names = []
|
elements_names = []
|
||||||
for i in range(nb_elements_per_line) :
|
for i in range(nb_elements_per_line) :
|
||||||
elements_names.append(random_unique_element_name(config, infos))
|
elements_names.append(random_unique_element_name(config, infos))
|
||||||
|
elements_names = random.choice([None, elements_names])
|
||||||
name = random_unique_name(infos)
|
name = random_unique_name(infos)
|
||||||
infos['view'].add_column(name,
|
infos['view'].add_column(name,
|
||||||
alias=alias,
|
alias=alias,
|
||||||
@ -359,6 +359,8 @@ def run(config):
|
|||||||
|
|
||||||
print("Initializing the DMS and the first view...")
|
print("Initializing the DMS and the first view...")
|
||||||
|
|
||||||
|
shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
|
||||||
|
|
||||||
ini_dms_and_first_view(config, infos)
|
ini_dms_and_first_view(config, infos)
|
||||||
print_test(config, repr(infos['view']))
|
print_test(config, repr(infos['view']))
|
||||||
|
|
||||||
|
@ -7,7 +7,8 @@ from .capi.obidms cimport obi_dms, \
|
|||||||
|
|
||||||
from .capi.obidmscolumn cimport obi_close_column, \
|
from .capi.obidmscolumn cimport obi_close_column, \
|
||||||
OBIDMS_column_p, \
|
OBIDMS_column_p, \
|
||||||
OBIDMS_column_header_p
|
OBIDMS_column_header_p, \
|
||||||
|
obi_get_elements_names
|
||||||
|
|
||||||
from .capi.obiutils cimport obi_format_date
|
from .capi.obiutils cimport obi_format_date
|
||||||
|
|
||||||
@ -73,7 +74,7 @@ from .capi.obiview cimport Obiview_p, \
|
|||||||
DEFINITION_COLUMN, \
|
DEFINITION_COLUMN, \
|
||||||
QUALITY_COLUMN
|
QUALITY_COLUMN
|
||||||
|
|
||||||
from libc.stdlib cimport malloc
|
from libc.stdlib cimport malloc, free
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -100,17 +101,17 @@ cdef class OBIDMS_column :
|
|||||||
def __getitem__(self, index_t line_nb):
|
def __getitem__(self, index_t line_nb):
|
||||||
return self.get_line(line_nb)
|
return self.get_line(line_nb)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self): # TODO discuss
|
||||||
return self.lines_used
|
return self._view.line_count
|
||||||
|
|
||||||
def __sizeof__(self):
|
def __sizeof__(self):
|
||||||
return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size)
|
return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self): # TODO discuss
|
||||||
# Declarations
|
# Declarations
|
||||||
cdef index_t line_nb
|
cdef index_t line_nb
|
||||||
# Yield each line
|
# Yield each line
|
||||||
for line_nb in range(self.lines_used):
|
for line_nb in range(self._view.line_count):
|
||||||
yield self.get_line(line_nb)
|
yield self.get_line(line_nb)
|
||||||
|
|
||||||
def __str__(self) :
|
def __str__(self) :
|
||||||
@ -138,7 +139,12 @@ cdef class OBIDMS_column :
|
|||||||
# elements_names property getter
|
# elements_names property getter
|
||||||
@property
|
@property
|
||||||
def elements_names(self):
|
def elements_names(self):
|
||||||
return (bytes2str(((self._pointer)[0].header).elements_names)).split(';')
|
cdef char* elts_names_b
|
||||||
|
cdef str elts_names
|
||||||
|
elts_names_b = obi_get_elements_names((self._pointer)[0])
|
||||||
|
elts_names = bytes2str(elts_names_b)
|
||||||
|
free(<char*>elts_names_b)
|
||||||
|
return elts_names.split(';')
|
||||||
|
|
||||||
# nb_elements_per_line property getter
|
# nb_elements_per_line property getter
|
||||||
@property
|
@property
|
||||||
@ -160,11 +166,6 @@ cdef class OBIDMS_column :
|
|||||||
def version(self):
|
def version(self):
|
||||||
return ((self._pointer)[0].header).version
|
return ((self._pointer)[0].header).version
|
||||||
|
|
||||||
# lines_used property getter
|
|
||||||
@property
|
|
||||||
def lines_used(self):
|
|
||||||
return (self._pointer)[0].header.lines_used
|
|
||||||
|
|
||||||
# comments property getter
|
# comments property getter
|
||||||
@property
|
@property
|
||||||
def comments(self):
|
def comments(self):
|
||||||
@ -268,15 +269,6 @@ cdef class OBIDMS_column_line :
|
|||||||
######################################################################################################
|
######################################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
######################################################################################################
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
######################################################################################################
|
|
||||||
|
|
||||||
|
|
||||||
cdef class OBIView_line :
|
cdef class OBIView_line :
|
||||||
|
|
||||||
def __init__(self, OBIView view, index_t line_nb) :
|
def __init__(self, OBIView view, index_t line_nb) :
|
||||||
@ -424,7 +416,7 @@ cdef class OBIDMS :
|
|||||||
cdef int i, j
|
cdef int i, j
|
||||||
cdef str column_name
|
cdef str column_name
|
||||||
|
|
||||||
view_infos_p = obi_view_map_file(self._pointer, str2bytes(view_name))
|
view_infos_p = obi_view_map_file(self._pointer, str2bytes(view_name), True)
|
||||||
view_infos_d = {}
|
view_infos_d = {}
|
||||||
view_infos_d["name"] = bytes2str(view_infos_p.name)
|
view_infos_d["name"] = bytes2str(view_infos_p.name)
|
||||||
view_infos_d["comments"] = bytes2str(view_infos_p.comments)
|
view_infos_d["comments"] = bytes2str(view_infos_p.comments)
|
||||||
|
@ -22,7 +22,7 @@ cdef class OBIDMS_column_str(OBIDMS_column):
|
|||||||
result = None
|
result = None
|
||||||
else :
|
else :
|
||||||
result = bytes2str(value)
|
result = bytes2str(value)
|
||||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
|
||||||
return result
|
return result
|
||||||
|
|
||||||
cpdef set_line(self, index_t line_nb, object value):
|
cpdef set_line(self, index_t line_nb, object value):
|
||||||
@ -46,7 +46,7 @@ cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
|
|||||||
result = None
|
result = None
|
||||||
else :
|
else :
|
||||||
result = bytes2str(value)
|
result = bytes2str(value)
|
||||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
|
||||||
return result
|
return result
|
||||||
|
|
||||||
cpdef object get_line(self, index_t line_nb) :
|
cpdef object get_line(self, index_t line_nb) :
|
||||||
@ -65,7 +65,7 @@ cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
|
|||||||
value_in_result = None
|
value_in_result = None
|
||||||
else :
|
else :
|
||||||
value_in_result = bytes2str(value)
|
value_in_result = bytes2str(value)
|
||||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
|
||||||
result[self.elements_names[i]] = value_in_result
|
result[self.elements_names[i]] = value_in_result
|
||||||
if all_NA and (value_in_result is not None) :
|
if all_NA and (value_in_result is not None) :
|
||||||
all_NA = False
|
all_NA = False
|
||||||
|
@ -18,3 +18,4 @@ cdef class OBI_Taxonomy :
|
|||||||
cdef class OBI_Taxon :
|
cdef class OBI_Taxon :
|
||||||
|
|
||||||
cdef ecotx_t* _pointer
|
cdef ecotx_t* _pointer
|
||||||
|
cdef OBI_Taxonomy _tax
|
||||||
|
@ -7,10 +7,10 @@ from .capi.obitaxonomy cimport obi_read_taxonomy, \
|
|||||||
obi_write_taxonomy, \
|
obi_write_taxonomy, \
|
||||||
obi_close_taxonomy, \
|
obi_close_taxonomy, \
|
||||||
obi_taxo_get_taxon_with_taxid, \
|
obi_taxo_get_taxon_with_taxid, \
|
||||||
obi_taxonomy_add_local_taxon, \
|
obi_taxo_add_local_taxon, \
|
||||||
|
obi_taxo_add_preferred_name_with_taxon, \
|
||||||
ecotx_t
|
ecotx_t
|
||||||
|
|
||||||
|
|
||||||
from ._obidms cimport OBIDMS
|
from ._obidms cimport OBIDMS
|
||||||
|
|
||||||
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
||||||
@ -42,7 +42,7 @@ cdef class OBI_Taxonomy :
|
|||||||
if taxon_p == NULL :
|
if taxon_p == NULL :
|
||||||
raise Exception("Taxon not found")
|
raise Exception("Taxon not found")
|
||||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||||
return OBI_Taxon(taxon_capsule)
|
return OBI_Taxon(taxon_capsule, self)
|
||||||
else :
|
else :
|
||||||
raise Exception("Not implemented")
|
raise Exception("Not implemented")
|
||||||
|
|
||||||
@ -60,7 +60,7 @@ cdef class OBI_Taxonomy :
|
|||||||
for t in range(self._pointer.taxa.count):
|
for t in range(self._pointer.taxa.count):
|
||||||
taxon_p = <ecotx_t*> (taxa+t)
|
taxon_p = <ecotx_t*> (taxa+t)
|
||||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||||
yield OBI_Taxon(taxon_capsule)
|
yield OBI_Taxon(taxon_capsule, self)
|
||||||
|
|
||||||
|
|
||||||
cpdef write(self, str prefix) :
|
cpdef write(self, str prefix) :
|
||||||
@ -70,7 +70,7 @@ cdef class OBI_Taxonomy :
|
|||||||
|
|
||||||
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
|
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
|
||||||
cdef int taxid
|
cdef int taxid
|
||||||
taxid = obi_taxonomy_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
|
taxid = obi_taxo_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
|
||||||
if taxid < 0 :
|
if taxid < 0 :
|
||||||
raise Exception("Error adding a new taxon to the taxonomy")
|
raise Exception("Error adding a new taxon to the taxonomy")
|
||||||
else :
|
else :
|
||||||
@ -85,10 +85,11 @@ cdef class OBI_Taxonomy :
|
|||||||
|
|
||||||
cdef class OBI_Taxon : # TODO dict subclass?
|
cdef class OBI_Taxon : # TODO dict subclass?
|
||||||
|
|
||||||
def __init__(self, object taxon_capsule) :
|
def __init__(self, object taxon_capsule, OBI_Taxonomy tax) :
|
||||||
self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
|
self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
|
||||||
if self._pointer == NULL :
|
if self._pointer == NULL :
|
||||||
raise Exception("Error reading the taxonomy")
|
raise Exception("Error reading a taxon (NULL pointer)")
|
||||||
|
self._tax = tax
|
||||||
|
|
||||||
# name property getter
|
# name property getter
|
||||||
@property
|
@property
|
||||||
@ -115,12 +116,23 @@ cdef class OBI_Taxon : # TODO dict subclass?
|
|||||||
def parent(self):
|
def parent(self):
|
||||||
cdef object parent_capsule
|
cdef object parent_capsule
|
||||||
parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
|
parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
|
||||||
return OBI_Taxon(parent_capsule)
|
return OBI_Taxon(parent_capsule, self._tax)
|
||||||
|
|
||||||
|
# preferred name property getter and setter
|
||||||
|
@property
|
||||||
|
def preferred_name(self):
|
||||||
|
if self._pointer.preferred_name != NULL :
|
||||||
|
return bytes2str(self._pointer.preferred_name)
|
||||||
|
@preferred_name.setter
|
||||||
|
def preferred_name(self, str new_preferred_name) : # @DuplicatedSignature
|
||||||
|
if (obi_taxo_add_preferred_name_with_taxon(self._tax._pointer, self._pointer, str2bytes(new_preferred_name)) < 0) :
|
||||||
|
raise Exception("Error adding a new preferred name to a taxon")
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
d = {}
|
d = {}
|
||||||
d['taxid'] = self.taxid
|
d['taxid'] = self.taxid
|
||||||
d['name'] = self.name
|
d['name'] = self.name
|
||||||
|
d['preferred name'] = self.preferred_name
|
||||||
d['parent'] = self.parent.taxid
|
d['parent'] = self.parent.taxid
|
||||||
d['farest'] = self.farest
|
d['farest'] = self.farest
|
||||||
return str(d)
|
return str(d)
|
||||||
|
65
python/obitools3/obidms/capi/obialign.cfiles
Normal file
65
python/obitools3/obidms/capi/obialign.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../../src/bloom.h
|
||||||
|
../../../../src/bloom.c
|
||||||
|
../../../../src/char_str_indexer.h
|
||||||
|
../../../../src/char_str_indexer.c
|
||||||
|
../../../../src/crc64.h
|
||||||
|
../../../../src/crc64.c
|
||||||
|
../../../../src/dna_seq_indexer.h
|
||||||
|
../../../../src/dna_seq_indexer.c
|
||||||
|
../../../../src/encode.h
|
||||||
|
../../../../src/encode.c
|
||||||
|
../../../../src/hashtable.h
|
||||||
|
../../../../src/hashtable.c
|
||||||
|
../../../../src/murmurhash2.h
|
||||||
|
../../../../src/murmurhash2.c
|
||||||
|
../../../../src/obi_align.h
|
||||||
|
../../../../src/obi_align.c
|
||||||
|
../../../../src/obiavl.h
|
||||||
|
../../../../src/obiavl.c
|
||||||
|
../../../../src/obiblob_indexer.h
|
||||||
|
../../../../src/obiblob_indexer.c
|
||||||
|
../../../../src/obiblob.h
|
||||||
|
../../../../src/obiblob.c
|
||||||
|
../../../../src/obidebug.h
|
||||||
|
../../../../src/obidms_taxonomy.h
|
||||||
|
../../../../src/obidms_taxonomy.c
|
||||||
|
../../../../src/obidms.h
|
||||||
|
../../../../src/obidms.c
|
||||||
|
../../../../src/obidmscolumn_blob.c
|
||||||
|
../../../../src/obidmscolumn_blob.h
|
||||||
|
../../../../src/obidmscolumn_bool.c
|
||||||
|
../../../../src/obidmscolumn_bool.h
|
||||||
|
../../../../src/obidmscolumn_char.c
|
||||||
|
../../../../src/obidmscolumn_char.h
|
||||||
|
../../../../src/obidmscolumn_float.c
|
||||||
|
../../../../src/obidmscolumn_float.h
|
||||||
|
../../../../src/obidmscolumn_idx.h
|
||||||
|
../../../../src/obidmscolumn_idx.c
|
||||||
|
../../../../src/obidmscolumn_int.c
|
||||||
|
../../../../src/obidmscolumn_int.h
|
||||||
|
../../../../src/obidmscolumn_qual.h
|
||||||
|
../../../../src/obidmscolumn_qual.c
|
||||||
|
../../../../src/obidmscolumn_seq.c
|
||||||
|
../../../../src/obidmscolumn_seq.h
|
||||||
|
../../../../src/obidmscolumn_str.c
|
||||||
|
../../../../src/obidmscolumn_str.h
|
||||||
|
../../../../src/obidmscolumn.h
|
||||||
|
../../../../src/obidmscolumn.c
|
||||||
|
../../../../src/obidmscolumndir.h
|
||||||
|
../../../../src/obidmscolumndir.c
|
||||||
|
../../../../src/obierrno.h
|
||||||
|
../../../../src/obierrno.c
|
||||||
|
../../../../src/obilittlebigman.h
|
||||||
|
../../../../src/obilittlebigman.c
|
||||||
|
../../../../src/obitypes.h
|
||||||
|
../../../../src/obitypes.c
|
||||||
|
../../../../src/obiview.h
|
||||||
|
../../../../src/obiview.c
|
||||||
|
../../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../../src/uint8_indexer.h
|
||||||
|
../../../../src/uint8_indexer.c
|
||||||
|
../../../../src/upperband.h
|
||||||
|
../../../../src/upperband.c
|
||||||
|
../../../../src/utils.h
|
||||||
|
../../../../src/utils.c
|
@ -18,5 +18,24 @@ cdef extern from "obi_align.h" nogil:
|
|||||||
double threshold,
|
double threshold,
|
||||||
bint normalize,
|
bint normalize,
|
||||||
int reference,
|
int reference,
|
||||||
bint similarity_mode)
|
bint similarity_mode,
|
||||||
|
int thread_count)
|
||||||
|
|
||||||
|
|
||||||
|
int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||||
|
const_char_p seq1_view_name,
|
||||||
|
const_char_p seq2_view_name,
|
||||||
|
const_char_p seq1_column_name,
|
||||||
|
const_char_p seq2_column_name,
|
||||||
|
const_char_p seq1_elt_name,
|
||||||
|
const_char_p seq2_elt_name,
|
||||||
|
const_char_p id1_column_name,
|
||||||
|
const_char_p id2_column_name,
|
||||||
|
const_char_p output_view_name,
|
||||||
|
const_char_p output_view_comments,
|
||||||
|
bint print_seq,
|
||||||
|
bint print_count,
|
||||||
|
double threshold,
|
||||||
|
bint normalize,
|
||||||
|
int reference,
|
||||||
|
bint similarity_mode);
|
||||||
|
65
python/obitools3/obidms/capi/obidms.cfiles
Normal file
65
python/obitools3/obidms/capi/obidms.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../../src/bloom.h
|
||||||
|
../../../../src/bloom.c
|
||||||
|
../../../../src/char_str_indexer.h
|
||||||
|
../../../../src/char_str_indexer.c
|
||||||
|
../../../../src/crc64.h
|
||||||
|
../../../../src/crc64.c
|
||||||
|
../../../../src/dna_seq_indexer.h
|
||||||
|
../../../../src/dna_seq_indexer.c
|
||||||
|
../../../../src/encode.h
|
||||||
|
../../../../src/encode.c
|
||||||
|
../../../../src/hashtable.h
|
||||||
|
../../../../src/hashtable.c
|
||||||
|
../../../../src/murmurhash2.h
|
||||||
|
../../../../src/murmurhash2.c
|
||||||
|
../../../../src/obi_align.h
|
||||||
|
../../../../src/obi_align.c
|
||||||
|
../../../../src/obiavl.h
|
||||||
|
../../../../src/obiavl.c
|
||||||
|
../../../../src/obiblob_indexer.h
|
||||||
|
../../../../src/obiblob_indexer.c
|
||||||
|
../../../../src/obiblob.h
|
||||||
|
../../../../src/obiblob.c
|
||||||
|
../../../../src/obidebug.h
|
||||||
|
../../../../src/obidms_taxonomy.h
|
||||||
|
../../../../src/obidms_taxonomy.c
|
||||||
|
../../../../src/obidms.h
|
||||||
|
../../../../src/obidms.c
|
||||||
|
../../../../src/obidmscolumn_blob.c
|
||||||
|
../../../../src/obidmscolumn_blob.h
|
||||||
|
../../../../src/obidmscolumn_bool.c
|
||||||
|
../../../../src/obidmscolumn_bool.h
|
||||||
|
../../../../src/obidmscolumn_char.c
|
||||||
|
../../../../src/obidmscolumn_char.h
|
||||||
|
../../../../src/obidmscolumn_float.c
|
||||||
|
../../../../src/obidmscolumn_float.h
|
||||||
|
../../../../src/obidmscolumn_idx.h
|
||||||
|
../../../../src/obidmscolumn_idx.c
|
||||||
|
../../../../src/obidmscolumn_int.c
|
||||||
|
../../../../src/obidmscolumn_int.h
|
||||||
|
../../../../src/obidmscolumn_qual.h
|
||||||
|
../../../../src/obidmscolumn_qual.c
|
||||||
|
../../../../src/obidmscolumn_seq.c
|
||||||
|
../../../../src/obidmscolumn_seq.h
|
||||||
|
../../../../src/obidmscolumn_str.c
|
||||||
|
../../../../src/obidmscolumn_str.h
|
||||||
|
../../../../src/obidmscolumn.h
|
||||||
|
../../../../src/obidmscolumn.c
|
||||||
|
../../../../src/obidmscolumndir.h
|
||||||
|
../../../../src/obidmscolumndir.c
|
||||||
|
../../../../src/obierrno.h
|
||||||
|
../../../../src/obierrno.c
|
||||||
|
../../../../src/obilittlebigman.h
|
||||||
|
../../../../src/obilittlebigman.c
|
||||||
|
../../../../src/obitypes.h
|
||||||
|
../../../../src/obitypes.c
|
||||||
|
../../../../src/obiview.h
|
||||||
|
../../../../src/obiview.c
|
||||||
|
../../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../../src/uint8_indexer.h
|
||||||
|
../../../../src/uint8_indexer.c
|
||||||
|
../../../../src/upperband.h
|
||||||
|
../../../../src/upperband.c
|
||||||
|
../../../../src/utils.h
|
||||||
|
../../../../src/utils.c
|
65
python/obitools3/obidms/capi/obidmscolumn.cfiles
Normal file
65
python/obitools3/obidms/capi/obidmscolumn.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../../src/bloom.h
|
||||||
|
../../../../src/bloom.c
|
||||||
|
../../../../src/char_str_indexer.h
|
||||||
|
../../../../src/char_str_indexer.c
|
||||||
|
../../../../src/crc64.h
|
||||||
|
../../../../src/crc64.c
|
||||||
|
../../../../src/dna_seq_indexer.h
|
||||||
|
../../../../src/dna_seq_indexer.c
|
||||||
|
../../../../src/encode.h
|
||||||
|
../../../../src/encode.c
|
||||||
|
../../../../src/hashtable.h
|
||||||
|
../../../../src/hashtable.c
|
||||||
|
../../../../src/murmurhash2.h
|
||||||
|
../../../../src/murmurhash2.c
|
||||||
|
../../../../src/obi_align.h
|
||||||
|
../../../../src/obi_align.c
|
||||||
|
../../../../src/obiavl.h
|
||||||
|
../../../../src/obiavl.c
|
||||||
|
../../../../src/obiblob_indexer.h
|
||||||
|
../../../../src/obiblob_indexer.c
|
||||||
|
../../../../src/obiblob.h
|
||||||
|
../../../../src/obiblob.c
|
||||||
|
../../../../src/obidebug.h
|
||||||
|
../../../../src/obidms_taxonomy.h
|
||||||
|
../../../../src/obidms_taxonomy.c
|
||||||
|
../../../../src/obidms.h
|
||||||
|
../../../../src/obidms.c
|
||||||
|
../../../../src/obidmscolumn_blob.c
|
||||||
|
../../../../src/obidmscolumn_blob.h
|
||||||
|
../../../../src/obidmscolumn_bool.c
|
||||||
|
../../../../src/obidmscolumn_bool.h
|
||||||
|
../../../../src/obidmscolumn_char.c
|
||||||
|
../../../../src/obidmscolumn_char.h
|
||||||
|
../../../../src/obidmscolumn_float.c
|
||||||
|
../../../../src/obidmscolumn_float.h
|
||||||
|
../../../../src/obidmscolumn_idx.h
|
||||||
|
../../../../src/obidmscolumn_idx.c
|
||||||
|
../../../../src/obidmscolumn_int.c
|
||||||
|
../../../../src/obidmscolumn_int.h
|
||||||
|
../../../../src/obidmscolumn_qual.h
|
||||||
|
../../../../src/obidmscolumn_qual.c
|
||||||
|
../../../../src/obidmscolumn_seq.c
|
||||||
|
../../../../src/obidmscolumn_seq.h
|
||||||
|
../../../../src/obidmscolumn_str.c
|
||||||
|
../../../../src/obidmscolumn_str.h
|
||||||
|
../../../../src/obidmscolumn.h
|
||||||
|
../../../../src/obidmscolumn.c
|
||||||
|
../../../../src/obidmscolumndir.h
|
||||||
|
../../../../src/obidmscolumndir.c
|
||||||
|
../../../../src/obierrno.h
|
||||||
|
../../../../src/obierrno.c
|
||||||
|
../../../../src/obilittlebigman.h
|
||||||
|
../../../../src/obilittlebigman.c
|
||||||
|
../../../../src/obitypes.h
|
||||||
|
../../../../src/obitypes.c
|
||||||
|
../../../../src/obiview.h
|
||||||
|
../../../../src/obiview.c
|
||||||
|
../../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../../src/uint8_indexer.h
|
||||||
|
../../../../src/uint8_indexer.c
|
||||||
|
../../../../src/upperband.h
|
||||||
|
../../../../src/upperband.c
|
||||||
|
../../../../src/utils.h
|
||||||
|
../../../../src/utils.c
|
@ -48,29 +48,6 @@ cdef extern from "obidmscolumn.h" nogil:
|
|||||||
|
|
||||||
ctypedef OBIDMS_column_t* OBIDMS_column_p
|
ctypedef OBIDMS_column_t* OBIDMS_column_p
|
||||||
|
|
||||||
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|
||||||
const_char_p column_name,
|
|
||||||
OBIType_t type,
|
|
||||||
index_t nb_lines,
|
|
||||||
index_t nb_elements_per_line,
|
|
||||||
const_char_p elements_names,
|
|
||||||
const_char_p indexer_name,
|
|
||||||
const_char_p associated_colum_name,
|
|
||||||
obiversion_t associated_colum_version,
|
|
||||||
const_char_p comments)
|
|
||||||
|
|
||||||
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
|
||||||
const_char_p column_name,
|
|
||||||
obiversion_t version_number)
|
|
||||||
|
|
||||||
int obi_close_column(OBIDMS_column_p column)
|
|
||||||
|
|
||||||
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
|
||||||
OBIDMS_column_p line_selection,
|
|
||||||
const_char_p column_name,
|
|
||||||
obiversion_t version_number,
|
|
||||||
bint clone_data)
|
|
||||||
|
|
||||||
int obi_close_column(OBIDMS_column_p column)
|
int obi_close_column(OBIDMS_column_p column)
|
||||||
|
|
||||||
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms,
|
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms,
|
||||||
@ -82,7 +59,7 @@ cdef extern from "obidmscolumn.h" nogil:
|
|||||||
|
|
||||||
int obi_close_header(OBIDMS_column_header_p header)
|
int obi_close_header(OBIDMS_column_header_p header)
|
||||||
|
|
||||||
int obi_select(OBIDMS_column_p line_selection_column, index_t line_to_grep)
|
char* obi_get_elements_names(OBIDMS_column_p column)
|
||||||
|
|
||||||
|
|
||||||
cdef extern from "obidmscolumn_int.h" nogil:
|
cdef extern from "obidmscolumn_int.h" nogil:
|
||||||
|
65
python/obitools3/obidms/capi/obierrno.cfiles
Normal file
65
python/obitools3/obidms/capi/obierrno.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../../src/bloom.h
|
||||||
|
../../../../src/bloom.c
|
||||||
|
../../../../src/char_str_indexer.h
|
||||||
|
../../../../src/char_str_indexer.c
|
||||||
|
../../../../src/crc64.h
|
||||||
|
../../../../src/crc64.c
|
||||||
|
../../../../src/dna_seq_indexer.h
|
||||||
|
../../../../src/dna_seq_indexer.c
|
||||||
|
../../../../src/encode.h
|
||||||
|
../../../../src/encode.c
|
||||||
|
../../../../src/hashtable.h
|
||||||
|
../../../../src/hashtable.c
|
||||||
|
../../../../src/murmurhash2.h
|
||||||
|
../../../../src/murmurhash2.c
|
||||||
|
../../../../src/obi_align.h
|
||||||
|
../../../../src/obi_align.c
|
||||||
|
../../../../src/obiavl.h
|
||||||
|
../../../../src/obiavl.c
|
||||||
|
../../../../src/obiblob_indexer.h
|
||||||
|
../../../../src/obiblob_indexer.c
|
||||||
|
../../../../src/obiblob.h
|
||||||
|
../../../../src/obiblob.c
|
||||||
|
../../../../src/obidebug.h
|
||||||
|
../../../../src/obidms_taxonomy.h
|
||||||
|
../../../../src/obidms_taxonomy.c
|
||||||
|
../../../../src/obidms.h
|
||||||
|
../../../../src/obidms.c
|
||||||
|
../../../../src/obidmscolumn_blob.c
|
||||||
|
../../../../src/obidmscolumn_blob.h
|
||||||
|
../../../../src/obidmscolumn_bool.c
|
||||||
|
../../../../src/obidmscolumn_bool.h
|
||||||
|
../../../../src/obidmscolumn_char.c
|
||||||
|
../../../../src/obidmscolumn_char.h
|
||||||
|
../../../../src/obidmscolumn_float.c
|
||||||
|
../../../../src/obidmscolumn_float.h
|
||||||
|
../../../../src/obidmscolumn_idx.h
|
||||||
|
../../../../src/obidmscolumn_idx.c
|
||||||
|
../../../../src/obidmscolumn_int.c
|
||||||
|
../../../../src/obidmscolumn_int.h
|
||||||
|
../../../../src/obidmscolumn_qual.h
|
||||||
|
../../../../src/obidmscolumn_qual.c
|
||||||
|
../../../../src/obidmscolumn_seq.c
|
||||||
|
../../../../src/obidmscolumn_seq.h
|
||||||
|
../../../../src/obidmscolumn_str.c
|
||||||
|
../../../../src/obidmscolumn_str.h
|
||||||
|
../../../../src/obidmscolumn.h
|
||||||
|
../../../../src/obidmscolumn.c
|
||||||
|
../../../../src/obidmscolumndir.h
|
||||||
|
../../../../src/obidmscolumndir.c
|
||||||
|
../../../../src/obierrno.h
|
||||||
|
../../../../src/obierrno.c
|
||||||
|
../../../../src/obilittlebigman.h
|
||||||
|
../../../../src/obilittlebigman.c
|
||||||
|
../../../../src/obitypes.h
|
||||||
|
../../../../src/obitypes.c
|
||||||
|
../../../../src/obiview.h
|
||||||
|
../../../../src/obiview.c
|
||||||
|
../../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../../src/uint8_indexer.h
|
||||||
|
../../../../src/uint8_indexer.c
|
||||||
|
../../../../src/upperband.h
|
||||||
|
../../../../src/upperband.c
|
||||||
|
../../../../src/utils.h
|
||||||
|
../../../../src/utils.c
|
65
python/obitools3/obidms/capi/obitaxonomy.cfiles
Normal file
65
python/obitools3/obidms/capi/obitaxonomy.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../../src/bloom.h
|
||||||
|
../../../../src/bloom.c
|
||||||
|
../../../../src/char_str_indexer.h
|
||||||
|
../../../../src/char_str_indexer.c
|
||||||
|
../../../../src/crc64.h
|
||||||
|
../../../../src/crc64.c
|
||||||
|
../../../../src/dna_seq_indexer.h
|
||||||
|
../../../../src/dna_seq_indexer.c
|
||||||
|
../../../../src/encode.h
|
||||||
|
../../../../src/encode.c
|
||||||
|
../../../../src/hashtable.h
|
||||||
|
../../../../src/hashtable.c
|
||||||
|
../../../../src/murmurhash2.h
|
||||||
|
../../../../src/murmurhash2.c
|
||||||
|
../../../../src/obi_align.h
|
||||||
|
../../../../src/obi_align.c
|
||||||
|
../../../../src/obiavl.h
|
||||||
|
../../../../src/obiavl.c
|
||||||
|
../../../../src/obiblob_indexer.h
|
||||||
|
../../../../src/obiblob_indexer.c
|
||||||
|
../../../../src/obiblob.h
|
||||||
|
../../../../src/obiblob.c
|
||||||
|
../../../../src/obidebug.h
|
||||||
|
../../../../src/obidms_taxonomy.h
|
||||||
|
../../../../src/obidms_taxonomy.c
|
||||||
|
../../../../src/obidms.h
|
||||||
|
../../../../src/obidms.c
|
||||||
|
../../../../src/obidmscolumn_blob.c
|
||||||
|
../../../../src/obidmscolumn_blob.h
|
||||||
|
../../../../src/obidmscolumn_bool.c
|
||||||
|
../../../../src/obidmscolumn_bool.h
|
||||||
|
../../../../src/obidmscolumn_char.c
|
||||||
|
../../../../src/obidmscolumn_char.h
|
||||||
|
../../../../src/obidmscolumn_float.c
|
||||||
|
../../../../src/obidmscolumn_float.h
|
||||||
|
../../../../src/obidmscolumn_idx.h
|
||||||
|
../../../../src/obidmscolumn_idx.c
|
||||||
|
../../../../src/obidmscolumn_int.c
|
||||||
|
../../../../src/obidmscolumn_int.h
|
||||||
|
../../../../src/obidmscolumn_qual.h
|
||||||
|
../../../../src/obidmscolumn_qual.c
|
||||||
|
../../../../src/obidmscolumn_seq.c
|
||||||
|
../../../../src/obidmscolumn_seq.h
|
||||||
|
../../../../src/obidmscolumn_str.c
|
||||||
|
../../../../src/obidmscolumn_str.h
|
||||||
|
../../../../src/obidmscolumn.h
|
||||||
|
../../../../src/obidmscolumn.c
|
||||||
|
../../../../src/obidmscolumndir.h
|
||||||
|
../../../../src/obidmscolumndir.c
|
||||||
|
../../../../src/obierrno.h
|
||||||
|
../../../../src/obierrno.c
|
||||||
|
../../../../src/obilittlebigman.h
|
||||||
|
../../../../src/obilittlebigman.c
|
||||||
|
../../../../src/obitypes.h
|
||||||
|
../../../../src/obitypes.c
|
||||||
|
../../../../src/obiview.h
|
||||||
|
../../../../src/obiview.c
|
||||||
|
../../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../../src/uint8_indexer.h
|
||||||
|
../../../../src/uint8_indexer.c
|
||||||
|
../../../../src/upperband.h
|
||||||
|
../../../../src/upperband.c
|
||||||
|
../../../../src/utils.h
|
||||||
|
../../../../src/utils.c
|
@ -13,6 +13,7 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
|||||||
int32_t farest
|
int32_t farest
|
||||||
ecotxnode* parent
|
ecotxnode* parent
|
||||||
char* name
|
char* name
|
||||||
|
char* preferred_name
|
||||||
|
|
||||||
ctypedef ecotxnode ecotx_t
|
ctypedef ecotxnode ecotx_t
|
||||||
|
|
||||||
@ -56,4 +57,9 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
|||||||
|
|
||||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||||
|
|
||||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||||
|
|
||||||
|
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
|
||||||
|
|
||||||
|
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
|
||||||
|
|
||||||
|
65
python/obitools3/obidms/capi/obitypes.cfiles
Normal file
65
python/obitools3/obidms/capi/obitypes.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../../src/bloom.h
|
||||||
|
../../../../src/bloom.c
|
||||||
|
../../../../src/char_str_indexer.h
|
||||||
|
../../../../src/char_str_indexer.c
|
||||||
|
../../../../src/crc64.h
|
||||||
|
../../../../src/crc64.c
|
||||||
|
../../../../src/dna_seq_indexer.h
|
||||||
|
../../../../src/dna_seq_indexer.c
|
||||||
|
../../../../src/encode.h
|
||||||
|
../../../../src/encode.c
|
||||||
|
../../../../src/hashtable.h
|
||||||
|
../../../../src/hashtable.c
|
||||||
|
../../../../src/murmurhash2.h
|
||||||
|
../../../../src/murmurhash2.c
|
||||||
|
../../../../src/obi_align.h
|
||||||
|
../../../../src/obi_align.c
|
||||||
|
../../../../src/obiavl.h
|
||||||
|
../../../../src/obiavl.c
|
||||||
|
../../../../src/obiblob_indexer.h
|
||||||
|
../../../../src/obiblob_indexer.c
|
||||||
|
../../../../src/obiblob.h
|
||||||
|
../../../../src/obiblob.c
|
||||||
|
../../../../src/obidebug.h
|
||||||
|
../../../../src/obidms_taxonomy.h
|
||||||
|
../../../../src/obidms_taxonomy.c
|
||||||
|
../../../../src/obidms.h
|
||||||
|
../../../../src/obidms.c
|
||||||
|
../../../../src/obidmscolumn_blob.c
|
||||||
|
../../../../src/obidmscolumn_blob.h
|
||||||
|
../../../../src/obidmscolumn_bool.c
|
||||||
|
../../../../src/obidmscolumn_bool.h
|
||||||
|
../../../../src/obidmscolumn_char.c
|
||||||
|
../../../../src/obidmscolumn_char.h
|
||||||
|
../../../../src/obidmscolumn_float.c
|
||||||
|
../../../../src/obidmscolumn_float.h
|
||||||
|
../../../../src/obidmscolumn_idx.h
|
||||||
|
../../../../src/obidmscolumn_idx.c
|
||||||
|
../../../../src/obidmscolumn_int.c
|
||||||
|
../../../../src/obidmscolumn_int.h
|
||||||
|
../../../../src/obidmscolumn_qual.h
|
||||||
|
../../../../src/obidmscolumn_qual.c
|
||||||
|
../../../../src/obidmscolumn_seq.c
|
||||||
|
../../../../src/obidmscolumn_seq.h
|
||||||
|
../../../../src/obidmscolumn_str.c
|
||||||
|
../../../../src/obidmscolumn_str.h
|
||||||
|
../../../../src/obidmscolumn.h
|
||||||
|
../../../../src/obidmscolumn.c
|
||||||
|
../../../../src/obidmscolumndir.h
|
||||||
|
../../../../src/obidmscolumndir.c
|
||||||
|
../../../../src/obierrno.h
|
||||||
|
../../../../src/obierrno.c
|
||||||
|
../../../../src/obilittlebigman.h
|
||||||
|
../../../../src/obilittlebigman.c
|
||||||
|
../../../../src/obitypes.h
|
||||||
|
../../../../src/obitypes.c
|
||||||
|
../../../../src/obiview.h
|
||||||
|
../../../../src/obiview.c
|
||||||
|
../../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../../src/uint8_indexer.h
|
||||||
|
../../../../src/uint8_indexer.c
|
||||||
|
../../../../src/upperband.h
|
||||||
|
../../../../src/upperband.c
|
||||||
|
../../../../src/utils.h
|
||||||
|
../../../../src/utils.c
|
65
python/obitools3/obidms/capi/obiutils.cfiles
Normal file
65
python/obitools3/obidms/capi/obiutils.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../../src/bloom.h
|
||||||
|
../../../../src/bloom.c
|
||||||
|
../../../../src/char_str_indexer.h
|
||||||
|
../../../../src/char_str_indexer.c
|
||||||
|
../../../../src/crc64.h
|
||||||
|
../../../../src/crc64.c
|
||||||
|
../../../../src/dna_seq_indexer.h
|
||||||
|
../../../../src/dna_seq_indexer.c
|
||||||
|
../../../../src/encode.h
|
||||||
|
../../../../src/encode.c
|
||||||
|
../../../../src/hashtable.h
|
||||||
|
../../../../src/hashtable.c
|
||||||
|
../../../../src/murmurhash2.h
|
||||||
|
../../../../src/murmurhash2.c
|
||||||
|
../../../../src/obi_align.h
|
||||||
|
../../../../src/obi_align.c
|
||||||
|
../../../../src/obiavl.h
|
||||||
|
../../../../src/obiavl.c
|
||||||
|
../../../../src/obiblob_indexer.h
|
||||||
|
../../../../src/obiblob_indexer.c
|
||||||
|
../../../../src/obiblob.h
|
||||||
|
../../../../src/obiblob.c
|
||||||
|
../../../../src/obidebug.h
|
||||||
|
../../../../src/obidms_taxonomy.h
|
||||||
|
../../../../src/obidms_taxonomy.c
|
||||||
|
../../../../src/obidms.h
|
||||||
|
../../../../src/obidms.c
|
||||||
|
../../../../src/obidmscolumn_blob.c
|
||||||
|
../../../../src/obidmscolumn_blob.h
|
||||||
|
../../../../src/obidmscolumn_bool.c
|
||||||
|
../../../../src/obidmscolumn_bool.h
|
||||||
|
../../../../src/obidmscolumn_char.c
|
||||||
|
../../../../src/obidmscolumn_char.h
|
||||||
|
../../../../src/obidmscolumn_float.c
|
||||||
|
../../../../src/obidmscolumn_float.h
|
||||||
|
../../../../src/obidmscolumn_idx.h
|
||||||
|
../../../../src/obidmscolumn_idx.c
|
||||||
|
../../../../src/obidmscolumn_int.c
|
||||||
|
../../../../src/obidmscolumn_int.h
|
||||||
|
../../../../src/obidmscolumn_qual.h
|
||||||
|
../../../../src/obidmscolumn_qual.c
|
||||||
|
../../../../src/obidmscolumn_seq.c
|
||||||
|
../../../../src/obidmscolumn_seq.h
|
||||||
|
../../../../src/obidmscolumn_str.c
|
||||||
|
../../../../src/obidmscolumn_str.h
|
||||||
|
../../../../src/obidmscolumn.h
|
||||||
|
../../../../src/obidmscolumn.c
|
||||||
|
../../../../src/obidmscolumndir.h
|
||||||
|
../../../../src/obidmscolumndir.c
|
||||||
|
../../../../src/obierrno.h
|
||||||
|
../../../../src/obierrno.c
|
||||||
|
../../../../src/obilittlebigman.h
|
||||||
|
../../../../src/obilittlebigman.c
|
||||||
|
../../../../src/obitypes.h
|
||||||
|
../../../../src/obitypes.c
|
||||||
|
../../../../src/obiview.h
|
||||||
|
../../../../src/obiview.c
|
||||||
|
../../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../../src/uint8_indexer.h
|
||||||
|
../../../../src/uint8_indexer.c
|
||||||
|
../../../../src/upperband.h
|
||||||
|
../../../../src/upperband.c
|
||||||
|
../../../../src/utils.h
|
||||||
|
../../../../src/utils.c
|
65
python/obitools3/obidms/capi/obiview.cfiles
Normal file
65
python/obitools3/obidms/capi/obiview.cfiles
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
../../../../src/bloom.h
|
||||||
|
../../../../src/bloom.c
|
||||||
|
../../../../src/char_str_indexer.h
|
||||||
|
../../../../src/char_str_indexer.c
|
||||||
|
../../../../src/crc64.h
|
||||||
|
../../../../src/crc64.c
|
||||||
|
../../../../src/dna_seq_indexer.h
|
||||||
|
../../../../src/dna_seq_indexer.c
|
||||||
|
../../../../src/encode.h
|
||||||
|
../../../../src/encode.c
|
||||||
|
../../../../src/hashtable.h
|
||||||
|
../../../../src/hashtable.c
|
||||||
|
../../../../src/murmurhash2.h
|
||||||
|
../../../../src/murmurhash2.c
|
||||||
|
../../../../src/obi_align.h
|
||||||
|
../../../../src/obi_align.c
|
||||||
|
../../../../src/obiavl.h
|
||||||
|
../../../../src/obiavl.c
|
||||||
|
../../../../src/obiblob_indexer.h
|
||||||
|
../../../../src/obiblob_indexer.c
|
||||||
|
../../../../src/obiblob.h
|
||||||
|
../../../../src/obiblob.c
|
||||||
|
../../../../src/obidebug.h
|
||||||
|
../../../../src/obidms_taxonomy.h
|
||||||
|
../../../../src/obidms_taxonomy.c
|
||||||
|
../../../../src/obidms.h
|
||||||
|
../../../../src/obidms.c
|
||||||
|
../../../../src/obidmscolumn_blob.c
|
||||||
|
../../../../src/obidmscolumn_blob.h
|
||||||
|
../../../../src/obidmscolumn_bool.c
|
||||||
|
../../../../src/obidmscolumn_bool.h
|
||||||
|
../../../../src/obidmscolumn_char.c
|
||||||
|
../../../../src/obidmscolumn_char.h
|
||||||
|
../../../../src/obidmscolumn_float.c
|
||||||
|
../../../../src/obidmscolumn_float.h
|
||||||
|
../../../../src/obidmscolumn_idx.h
|
||||||
|
../../../../src/obidmscolumn_idx.c
|
||||||
|
../../../../src/obidmscolumn_int.c
|
||||||
|
../../../../src/obidmscolumn_int.h
|
||||||
|
../../../../src/obidmscolumn_qual.h
|
||||||
|
../../../../src/obidmscolumn_qual.c
|
||||||
|
../../../../src/obidmscolumn_seq.c
|
||||||
|
../../../../src/obidmscolumn_seq.h
|
||||||
|
../../../../src/obidmscolumn_str.c
|
||||||
|
../../../../src/obidmscolumn_str.h
|
||||||
|
../../../../src/obidmscolumn.h
|
||||||
|
../../../../src/obidmscolumn.c
|
||||||
|
../../../../src/obidmscolumndir.h
|
||||||
|
../../../../src/obidmscolumndir.c
|
||||||
|
../../../../src/obierrno.h
|
||||||
|
../../../../src/obierrno.c
|
||||||
|
../../../../src/obilittlebigman.h
|
||||||
|
../../../../src/obilittlebigman.c
|
||||||
|
../../../../src/obitypes.h
|
||||||
|
../../../../src/obitypes.c
|
||||||
|
../../../../src/obiview.h
|
||||||
|
../../../../src/obiview.c
|
||||||
|
../../../../src/sse_banded_LCS_alignment.h
|
||||||
|
../../../../src/sse_banded_LCS_alignment.c
|
||||||
|
../../../../src/uint8_indexer.h
|
||||||
|
../../../../src/uint8_indexer.c
|
||||||
|
../../../../src/upperband.h
|
||||||
|
../../../../src/upperband.c
|
||||||
|
../../../../src/utils.h
|
||||||
|
../../../../src/utils.c
|
@ -68,7 +68,7 @@ cdef extern from "obiview.h" nogil:
|
|||||||
|
|
||||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column)
|
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column)
|
||||||
|
|
||||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
|
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bint finished)
|
||||||
|
|
||||||
int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
|
int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
|
||||||
|
|
||||||
@ -96,10 +96,6 @@ cdef extern from "obiview.h" nogil:
|
|||||||
|
|
||||||
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
|
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
|
||||||
|
|
||||||
int obi_save_view(Obiview_p view)
|
|
||||||
|
|
||||||
int obi_close_view(Obiview_p view)
|
|
||||||
|
|
||||||
int obi_save_and_close_view(Obiview_p view)
|
int obi_save_and_close_view(Obiview_p view)
|
||||||
|
|
||||||
|
|
||||||
|
1028
src/obi_align.c
1028
src/obi_align.c
File diff suppressed because it is too large
Load Diff
@ -1,12 +1,12 @@
|
|||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Sequence alignment functions header file *
|
* LCS sequence alignment functions header file *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @file obi_align.h
|
* @file obi_align.h
|
||||||
* @author Celine Mercier
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
* @date May 11th 2016
|
* @date May 11th 2016
|
||||||
* @brief Header file for the functions handling the alignment of DNA sequences.
|
* @brief Header file for the functions handling the LCS alignment of DNA sequences.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
@ -55,7 +55,7 @@
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Aligns a NUC_SEQ column with itself.
|
* @brief Aligns an OBI_SEQ column with itself.
|
||||||
*
|
*
|
||||||
* Note: The columns where the results are written are automatically named and created.
|
* Note: The columns where the results are written are automatically named and created.
|
||||||
*
|
*
|
||||||
@ -77,7 +77,7 @@
|
|||||||
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||||
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||||
* @param normalize Whether the score should be normalized with the reference sequence length.
|
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||||
* @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
||||||
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||||
*
|
*
|
||||||
* @returns A value indicating the success of the operation.
|
* @returns A value indicating the success of the operation.
|
||||||
@ -92,18 +92,64 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
|
|||||||
const char* id_column_name,
|
const char* id_column_name,
|
||||||
const char* output_view_name, const char* output_view_comments,
|
const char* output_view_name, const char* output_view_comments,
|
||||||
bool print_seq, bool print_count,
|
bool print_seq, bool print_count,
|
||||||
double threshold, bool normalize, int reference, bool similarity_mode);
|
double threshold, bool normalize, int reference, bool similarity_mode,
|
||||||
|
int thread_count);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief
|
* @brief Aligns two OBI_SEQ columns.
|
||||||
*
|
*
|
||||||
* TODO
|
* The columns must belong to the same OBIDMS, but can belong to different views.
|
||||||
*
|
*
|
||||||
|
* Note: The columns where the results are written are automatically named and created.
|
||||||
|
*
|
||||||
|
* @param dms A pointer on an OBIDMS.
|
||||||
|
* @param seq1_view_name The name of the view where the first column to align is.
|
||||||
|
* @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one).
|
||||||
|
* @param seq1_column_name The name of the first OBI_SEQ column in the input view to align.
|
||||||
|
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
|
||||||
|
* @param seq2_column_name The name of the second OBI_SEQ column in the input view to align.
|
||||||
|
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
|
||||||
|
* @param seq1_elt_name The name of the element in the first column corresponding to the sequence to align, if the column has multiple
|
||||||
|
* elements per line.
|
||||||
|
* @param seq2_elt_name The name of the element in the second column corresponding to the sequence to align, if the column has multiple
|
||||||
|
* elements per line.
|
||||||
|
* @param id1_column_name The name of the column in the first input view containing the identifiers of the first sequence to align.
|
||||||
|
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
|
||||||
|
* @param id2_column_name The name of the column in the second input view containing the identifiers of the second sequence to align.
|
||||||
|
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
|
||||||
|
* @param output_view_name The name of the output view where the results should be written (should not already exist).
|
||||||
|
* @param output_view_comments The comments that should be associated with the output view.
|
||||||
|
* @param print_seq A boolean indicating whether the aligned sequences should be copied in the output view.
|
||||||
|
* @param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view.
|
||||||
|
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||||
|
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||||
|
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||||
|
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||||
|
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||||
|
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||||
|
* @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
||||||
|
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||||
|
*
|
||||||
|
* @returns A value indicating the success of the operation.
|
||||||
|
* @retval 0 if the operation was successfully completed.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since December 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,
|
int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||||
// Obiview_p score_view, OBIDMS_column_p score_column,
|
const char* seq1_view_name,
|
||||||
// double threshold, bool normalize, int reference, bool similarity_mode);
|
const char* seq2_view_name,
|
||||||
|
const char* seq1_column_name,
|
||||||
|
const char* seq2_column_name,
|
||||||
|
const char* seq1_elt_name,
|
||||||
|
const char* seq2_elt_name,
|
||||||
|
const char* id1_column_name,
|
||||||
|
const char* id2_column_name,
|
||||||
|
const char* output_view_name, const char* output_view_comments,
|
||||||
|
bool print_seq, bool print_count,
|
||||||
|
double threshold, bool normalize, int reference, bool similarity_mode);
|
||||||
|
|
||||||
|
|
||||||
#endif /* OBI_ALIGN_H_ */
|
#endif /* OBI_ALIGN_H_ */
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
|||||||
* @file obidms_taxonomy.h
|
* @file obidms_taxonomy.h
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
* @date March 2nd 2016
|
* @date March 2nd 2016
|
||||||
* @brief Header file for the functions handling the reading of binary taxonomy files.
|
* @brief Header file for the functions handling the reading and writing of taxonomy files.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
@ -17,105 +17,384 @@
|
|||||||
#include "obidms.h"
|
#include "obidms.h"
|
||||||
|
|
||||||
|
|
||||||
#define MIN_LOCAL_TAXID (10000000)
|
#define MIN_LOCAL_TAXID (10000000) /**< The minimum taxid for a taxon added locally (i.e. not an NCBI taxon).
|
||||||
#define TAX_NAME_LEN (1024)
|
*/
|
||||||
|
#define TAX_NAME_LEN (1024) /**< The maximum length for the taxonomy name.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for a taxon as stored in a .tdx file.
|
||||||
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int32_t taxid;
|
int32_t taxid; /**< Taxid.
|
||||||
int32_t rank;
|
*/
|
||||||
int32_t parent;
|
int32_t rank; /**< Rank index.
|
||||||
int32_t name_length;
|
*/
|
||||||
char name[1];
|
int32_t parent; /**< Index, in the taxid index, of the parent node in the taxonomic tree.
|
||||||
|
*/
|
||||||
|
int32_t name_length; /**< Length of the taxon scientific name.
|
||||||
|
*/
|
||||||
|
char name[]; /**< Scientific name of the taxon.
|
||||||
|
*/
|
||||||
} ecotxformat_t;
|
} ecotxformat_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for a taxon as stored in a taxonomy structure.
|
||||||
|
*/
|
||||||
typedef struct ecotxnode {
|
typedef struct ecotxnode {
|
||||||
int32_t taxid;
|
int32_t taxid; /**< Taxid. // TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
|
||||||
int32_t rank;
|
*/
|
||||||
int32_t farest;
|
int32_t rank; /**< Rank index in ecorankidx_t structure.
|
||||||
int32_t idx;
|
*/
|
||||||
struct ecotxnode* parent;
|
int32_t farest; /**< Longest branch length, used to compute distances between taxa faster.
|
||||||
char* name;
|
*/
|
||||||
bool local;
|
int32_t idx; /**< Index in the ecotxidx_t structure.
|
||||||
|
*/
|
||||||
|
struct ecotxnode* parent; /**< Pointer on the parent node in the taxonomic tree.
|
||||||
|
*/
|
||||||
|
char* name; /**< Scientific name of the taxon.
|
||||||
|
*/
|
||||||
|
char* preferred_name; /**< Preferred name of the taxon if there is one, otherwise NULL.
|
||||||
|
*/
|
||||||
|
bool local; /**< A boolean indicating whether the taxon is local or not.
|
||||||
|
*/
|
||||||
} ecotx_t;
|
} ecotx_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for the taxon index in a taxonomy structure.
|
||||||
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int32_t count;
|
int32_t count; /**< Number of taxa.
|
||||||
int32_t ncbi_count;
|
*/
|
||||||
int32_t local_count;
|
int32_t ncbi_count; /**< Number of NCBI taxa.
|
||||||
int32_t max_taxid;
|
*/
|
||||||
int32_t buffer_size;
|
int32_t local_count; /**< Number of taxa added locally.
|
||||||
ecotx_t taxon[1];
|
*/
|
||||||
|
int32_t max_taxid; /**< Maximum taxid existing in the taxon index.
|
||||||
|
*/
|
||||||
|
int32_t buffer_size; /**< Number of taxa. // TODO kept this but not sure of its use
|
||||||
|
*/
|
||||||
|
ecotx_t taxon[]; /**< Taxon array.
|
||||||
|
*/
|
||||||
} ecotxidx_t;
|
} ecotxidx_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for the rank index in a taxonomy structure.
|
||||||
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int32_t count;
|
int32_t count; /**< Number of ranks.
|
||||||
char* label[1];
|
*/
|
||||||
|
char* label[]; /**< Array of rank names.
|
||||||
|
*/
|
||||||
} ecorankidx_t;
|
} ecorankidx_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for a taxon name as stored in a .ndx file.
|
||||||
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int32_t is_scientific_name;
|
int32_t is_scientific_name; /**< A boolean indicating whether the name is a scientific name or not.
|
||||||
int32_t name_length;
|
*/
|
||||||
int32_t class_length;
|
int32_t name_length; /**< The name length.
|
||||||
int32_t taxid; // taxid idx
|
*/
|
||||||
char names[1];
|
int32_t class_length; /**< The name class length.
|
||||||
|
*/
|
||||||
|
int32_t taxid; /**< Index of the taxon in the taxid index.
|
||||||
|
*/
|
||||||
|
char names[]; /**< Taxon name and name class concatenated.
|
||||||
|
*/
|
||||||
} econameformat_t;
|
} econameformat_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for a taxon name as stored in a taxonomy structure.
|
||||||
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char* name;
|
char* name; /**< Taxon name.
|
||||||
char* class_name;
|
*/
|
||||||
int32_t is_scientific_name;
|
char* class_name; /**< Name class.
|
||||||
struct ecotxnode* taxon;
|
*/
|
||||||
|
int32_t is_scientific_name; /**< A boolean indicating whether the name is a scientific name or not.
|
||||||
|
*/
|
||||||
|
struct ecotxnode* taxon; /**< Pointer on the taxon in the taxon index.
|
||||||
|
*/
|
||||||
} econame_t;
|
} econame_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for the name index in a taxonomy structure.
|
||||||
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int32_t count;
|
int32_t count; /**< Number of names.
|
||||||
econame_t names[1];
|
*/
|
||||||
|
econame_t names[]; /**< Array of names.
|
||||||
|
*/
|
||||||
} econameidx_t;
|
} econameidx_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for a taxid/index pair as stored in a taxonomy structure.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
int32_t taxid; /**< Taxid.
|
||||||
|
*/
|
||||||
|
int32_t idx; /**< Index of the taxid in the taxon index, -1 if the taxid is deprecated.
|
||||||
|
*/
|
||||||
|
} ecomerged_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for a merged taxid index in a taxonomy structure.
|
||||||
|
*
|
||||||
|
* This index includes all deprecated taxids that now refer to different taxids, and
|
||||||
|
* the deprecated taxids that are deleted.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
int32_t count; /**< Number of taxid/index pairs.
|
||||||
|
*/
|
||||||
|
ecomerged_t merged[]; /**< Array of taxid/index pairs.
|
||||||
|
*/
|
||||||
|
} ecomergedidx_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Structure for a taxonomy.
|
||||||
|
*/
|
||||||
typedef struct OBIDMS_taxonomy_t {
|
typedef struct OBIDMS_taxonomy_t {
|
||||||
char tax_name[TAX_NAME_LEN];
|
char tax_name[TAX_NAME_LEN]; /**< Taxonomy name.
|
||||||
OBIDMS_p dms;
|
*/
|
||||||
ecorankidx_t* ranks;
|
OBIDMS_p dms; /**< A pointer on the DMS to which the taxonomy belongs.
|
||||||
econameidx_t* names;
|
*/
|
||||||
ecotxidx_t* taxa;
|
ecomergedidx_t* merged_idx; /**< Merged taxid index.
|
||||||
|
*/
|
||||||
|
ecorankidx_t* ranks; /**< Taxonomic ranks.
|
||||||
|
*/
|
||||||
|
econameidx_t* names; /**< Taxon names.
|
||||||
|
*/
|
||||||
|
econameidx_t* preferred_names; /**< Taxon preferred names (i.e. added locally).
|
||||||
|
*/
|
||||||
|
ecotxidx_t* taxa; /**< Taxa.
|
||||||
|
*/
|
||||||
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
||||||
|
|
||||||
|
|
||||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
|
/**
|
||||||
|
* @brief Function reading an NCBI taxdump and loading its information into a taxonomy structure.
|
||||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
|
*
|
||||||
|
* @param taxdump The path to the taxdump directory.
|
||||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
|
*
|
||||||
|
* @returns A pointer on the read taxonomy structure.
|
||||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
* @retval NULL if an error occurred.
|
||||||
|
*
|
||||||
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
|
* @since 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
*/
|
||||||
|
|
||||||
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
||||||
|
|
||||||
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
||||||
|
|
||||||
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
||||||
|
|
||||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
|
||||||
|
|
||||||
|
|
||||||
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
|
|
||||||
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
|
|
||||||
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
|
|
||||||
|
|
||||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
|
||||||
|
|
||||||
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
||||||
|
|
||||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
|
||||||
|
/**
|
||||||
|
* @brief Function reading a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files)
|
||||||
|
* and loading its information into a taxonomy structure.
|
||||||
|
*
|
||||||
|
* @param dms A pointer on the DMS to which the taxonomy belongs.
|
||||||
|
* @param taxonomy_name The name (prefix) of the taxonomy.
|
||||||
|
* @param read_alternative_names A boolean indicating whether names other than scientific and preferred names should be read.
|
||||||
|
*
|
||||||
|
* @returns A pointer on the read taxonomy structure.
|
||||||
|
* @retval NULL if an error occurred.
|
||||||
|
*
|
||||||
|
* @since 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function writing a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files).
|
||||||
|
*
|
||||||
|
* @param dms A pointer on the DMS to which the taxonomy belongs.
|
||||||
|
* @param tax A pointer on the taxonomy structure.
|
||||||
|
* @param tax_name The name (prefix) of the taxonomy.
|
||||||
|
*
|
||||||
|
* @returns An integer value indicating the success of the operation.
|
||||||
|
* @retval 0 on success.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function closing a taxonomy structure.
|
||||||
|
*
|
||||||
|
* This function writes all changes to the binary files (local taxa and preferred names) and free all allocated memory for the structure.
|
||||||
|
*
|
||||||
|
* @param taxonomy A pointer on the taxonomy structure.
|
||||||
|
*
|
||||||
|
* @returns An integer value indicating the success of the operation.
|
||||||
|
* @retval 0 on success.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function adding a local taxon to a taxonomy.
|
||||||
|
*
|
||||||
|
* @param tax A pointer on the taxonomy structure.
|
||||||
|
* @param name The taxon scientific name.
|
||||||
|
* @param rank_name The taxon rank name.
|
||||||
|
* @param parent_taxid The taxid of the parent node in the taxonomic tree.
|
||||||
|
* @param min_taxid The minimum taxid to give to the new taxon (the function will choose a new taxid >= min_taxid and >= MIN_LOCAL_TAXID).
|
||||||
|
*
|
||||||
|
* @returns An integer value indicating the success of the operation.
|
||||||
|
* @retval 0 on success.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function adding a preferred name to a taxon in a taxonomy, referred to by its taxid.
|
||||||
|
*
|
||||||
|
* @param tax A pointer on the taxonomy structure.
|
||||||
|
* @param taxid The taxid of the taxon that should have a new preferred name.
|
||||||
|
* @param preferred_name The new preferred name.
|
||||||
|
*
|
||||||
|
* @returns An integer value indicating the success of the operation.
|
||||||
|
* @retval 0 on success.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function adding a preferred name to a taxon in a taxonomy, referred to by the taxon pointer.
|
||||||
|
*
|
||||||
|
* @param tax A pointer on the taxonomy structure.
|
||||||
|
* @param taxon A pointer on the taxon that should have a new preferred name.
|
||||||
|
* @param preferred_name The new preferred name.
|
||||||
|
*
|
||||||
|
* @returns An integer value indicating the success of the operation.
|
||||||
|
* @retval 0 on success.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function returning the parent of a taxon at a given rank.
|
||||||
|
*
|
||||||
|
* @param taxon A pointer on the taxon.
|
||||||
|
* @param rankidx The index of the rank wanted.
|
||||||
|
*
|
||||||
|
* @returns A pointer on the parent taxon at the wanted rank.
|
||||||
|
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||||
|
*/
|
||||||
|
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function returning a taxon given its taxid.
|
||||||
|
*
|
||||||
|
* @param taxonomy A pointer on the taxonomy.
|
||||||
|
* @param taxid The taxid of the taxon.
|
||||||
|
*
|
||||||
|
* @returns A pointer on the wanted taxon.
|
||||||
|
* @retval NULL if no taxon was found with the given taxid.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function checking whether a taxon is under another in the taxonomy tree.
|
||||||
|
*
|
||||||
|
* @param taxon A pointer on the first taxon.
|
||||||
|
* @param other_taxid The taxid of the second taxon.
|
||||||
|
*
|
||||||
|
* @returns A boolean indicating whether the first taxon is under the second taxon in the taxonomy tree.
|
||||||
|
*/
|
||||||
|
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function returning the parent of a taxon at the species level.
|
||||||
|
*
|
||||||
|
* @param taxon A pointer on the taxon.
|
||||||
|
* @param taxonomy A pointer on the taxonomy structure.
|
||||||
|
*
|
||||||
|
* @returns A pointer on the parent taxon at the species level.
|
||||||
|
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||||
|
*/
|
||||||
|
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function returning the parent of a taxon at the genus level.
|
||||||
|
*
|
||||||
|
* @param taxon A pointer on the taxon.
|
||||||
|
* @param taxonomy A pointer on the taxonomy structure.
|
||||||
|
*
|
||||||
|
* @returns A pointer on the parent taxon at the genus level.
|
||||||
|
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||||
|
*/
|
||||||
|
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function returning the parent of a taxon at the family level.
|
||||||
|
*
|
||||||
|
* @param taxon A pointer on the taxon.
|
||||||
|
* @param taxonomy A pointer on the taxonomy structure.
|
||||||
|
*
|
||||||
|
* @returns A pointer on the parent taxon at the family level.
|
||||||
|
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||||
|
*/
|
||||||
|
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function returning the parent of a taxon at the kingdom level.
|
||||||
|
*
|
||||||
|
* @param taxon A pointer on the taxon.
|
||||||
|
* @param taxonomy A pointer on the taxonomy structure.
|
||||||
|
*
|
||||||
|
* @returns A pointer on the parent taxon at the kingdom level.
|
||||||
|
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||||
|
*/
|
||||||
|
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function returning the parent of a taxon at the superkingdom level.
|
||||||
|
*
|
||||||
|
* @param taxon A pointer on the taxon.
|
||||||
|
* @param taxonomy A pointer on the taxonomy structure.
|
||||||
|
*
|
||||||
|
* @returns A pointer on the parent taxon at the superkingdom level.
|
||||||
|
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||||
|
*/
|
||||||
|
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||||
|
|
||||||
|
@ -119,7 +119,7 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Internal function building the default elements names of the lines of a
|
* @brief Internal function building the default elements names of the lines of a
|
||||||
* column (i.e. "0;1;2;...;n").
|
* column, with ';' as separator (i.e. "0;1;2;...;n\0").
|
||||||
*
|
*
|
||||||
* @warning The returned pointer has to be freed by the caller.
|
* @warning The returned pointer has to be freed by the caller.
|
||||||
*
|
*
|
||||||
@ -134,12 +134,61 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
|
|||||||
static char* build_default_elements_names(index_t nb_elements_per_line);
|
static char* build_default_elements_names(index_t nb_elements_per_line);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function formatting the elements names of the lines of a
|
||||||
|
* column with '\0' as separator (e.g. "0\01\02\0...\0n\0").
|
||||||
|
*
|
||||||
|
* @param elements_names The character string formatted with ';' as separator (e.g. "0;1;2;...;n\0").
|
||||||
|
* @param elts_names_length A pointer on an integer where the function will store the length of the character string.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static void format_elements_names(char* elements_names, int* elts_names_length);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function comparing two element names using their sorted index, using data stored in the column header.
|
||||||
|
*
|
||||||
|
* @param n1_sort_idx A pointer on the sorted index of the first name.
|
||||||
|
* @param n2_sort_idx A pointer on the sorted index of the second name.
|
||||||
|
* @param h A pointer on the column header.
|
||||||
|
*
|
||||||
|
* @returns A value < 0 if name1 < name2,
|
||||||
|
* a value > 0 if name1 > name2,
|
||||||
|
* and 0 if name1 == name2.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function comparing two element names using a pointer on the first name and the sorted index of the second name,
|
||||||
|
* using data stored in the column header.
|
||||||
|
*
|
||||||
|
* @param name1 A pointer on the first name.
|
||||||
|
* @param n2_sort_idx A pointer on the sorted index of the second name.
|
||||||
|
* @param h A pointer on the column header.
|
||||||
|
*
|
||||||
|
* @returns A value < 0 if name1 < name2,
|
||||||
|
* a value > 0 if name1 > name2,
|
||||||
|
* and 0 if name1 == name2.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Internal function setting the elements names of the lines of a
|
* @brief Internal function setting the elements names of the lines of a
|
||||||
* column in the header of the OBIDMS column structure.
|
* column in the header of the OBIDMS column structure.
|
||||||
*
|
*
|
||||||
* @param column A pointer as returned by obi_create_column().
|
* @param column A pointer as returned by obi_create_column().
|
||||||
* @param elements_names The names of the elements with ';' as separator.
|
* @param elements_names The names of the elements as formatted by format_elements_names().
|
||||||
|
* @param elts_names_length The length of elements_names.
|
||||||
*
|
*
|
||||||
* @retval 0 if the operation was successfully completed.
|
* @retval 0 if the operation was successfully completed.
|
||||||
* @retval -1 if an error occurred.
|
* @retval -1 if an error occurred.
|
||||||
@ -147,7 +196,35 @@ static char* build_default_elements_names(index_t nb_elements_per_line);
|
|||||||
* @since July 2015
|
* @since July 2015
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
static int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names);
|
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function counting the number of elements names in a character array.
|
||||||
|
*
|
||||||
|
* @param elements_names A pointer on the character string corresponding to the elements names,
|
||||||
|
* formatted with ';' or with '\0' as separator.
|
||||||
|
* @param elt_names_formatted Whether the separator is ';' (false), or '\0' (true, as formatted by format_elements_names()).
|
||||||
|
*
|
||||||
|
* @returns The number of elements names in the character array.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function computing the length of a character array containing elements names as formatted by format_elements_names().
|
||||||
|
*
|
||||||
|
* @param elements_names A pointer on the character string corresponding to the elements names as formatted by format_elements_names().
|
||||||
|
*
|
||||||
|
* @returns The length of a character array.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static int get_formatted_elt_names_length(const char* elements_names);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -198,6 +275,7 @@ static char* build_column_file_name(const char* column_name, obiversion_t versio
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static char* build_version_file_name(const char* column_name)
|
static char* build_version_file_name(const char* column_name)
|
||||||
{
|
{
|
||||||
char* file_name;
|
char* file_name;
|
||||||
@ -222,6 +300,7 @@ static char* build_version_file_name(const char* column_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block)
|
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block)
|
||||||
{
|
{
|
||||||
off_t loc_size;
|
off_t loc_size;
|
||||||
@ -346,6 +425,7 @@ static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory)
|
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory)
|
||||||
{
|
{
|
||||||
off_t loc_size;
|
off_t loc_size;
|
||||||
@ -437,10 +517,12 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static char* build_default_elements_names(index_t nb_elements_per_line)
|
static char* build_default_elements_names(index_t nb_elements_per_line)
|
||||||
{
|
{
|
||||||
char* elements_names;
|
char* elements_names;
|
||||||
int i;
|
int i;
|
||||||
|
int len;
|
||||||
|
|
||||||
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
|
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
|
||||||
if (elements_names == NULL)
|
if (elements_names == NULL)
|
||||||
@ -457,31 +539,169 @@ static char* build_default_elements_names(index_t nb_elements_per_line)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
len = 0;
|
||||||
for (i = 0; i < nb_elements_per_line; i++)
|
for (i = 0; i < nb_elements_per_line; i++)
|
||||||
sprintf(elements_names, "%d", i);
|
len += sprintf(elements_names+len, "%d;", i);
|
||||||
|
|
||||||
// Terminal character
|
// Terminal character
|
||||||
elements_names[strlen(elements_names)] = '\0';
|
elements_names[len-1] = '\0'; // -1 to delete last ';'
|
||||||
|
len--;
|
||||||
|
|
||||||
return elements_names;
|
return elements_names;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names)
|
|
||||||
|
static void format_elements_names(char* elements_names, int* elts_names_length)
|
||||||
{
|
{
|
||||||
if (strlen(elements_names) > ELEMENTS_NAMES_MAX)
|
int i;
|
||||||
|
|
||||||
|
*elts_names_length = strlen(elements_names);
|
||||||
|
|
||||||
|
// Replace the ';' with '\0'
|
||||||
|
for (i=0; i < *elts_names_length; i++)
|
||||||
|
{
|
||||||
|
if (elements_names[i] == ';')
|
||||||
|
elements_names[i] = '\0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h)
|
||||||
|
{
|
||||||
|
char* name1=NULL;
|
||||||
|
char* name2=NULL;
|
||||||
|
|
||||||
|
int name1_idx;
|
||||||
|
int name2_idx;
|
||||||
|
|
||||||
|
int name1_sort_idx = *((int*)n1_sort_idx);
|
||||||
|
int name2_sort_idx = *((int*)n2_sort_idx);
|
||||||
|
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
|
||||||
|
|
||||||
|
name1_idx = (header->elements_names_idx)[name1_sort_idx];
|
||||||
|
name1 = (header->elements_names)+name1_idx;
|
||||||
|
|
||||||
|
name2_idx = (header->elements_names_idx)[name2_sort_idx];
|
||||||
|
name2 = (header->elements_names)+name2_idx;
|
||||||
|
|
||||||
|
return strcmp(name1, name2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h)
|
||||||
|
{
|
||||||
|
char* name2=NULL;
|
||||||
|
int name2_idx;
|
||||||
|
|
||||||
|
int name2_sort_idx = *((int*)n2_sort_idx);
|
||||||
|
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
|
||||||
|
|
||||||
|
name2_idx = (header->elements_names_idx)[name2_sort_idx];
|
||||||
|
name2 = (header->elements_names)+name2_idx;
|
||||||
|
|
||||||
|
return strcmp(name1, name2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
// Check that the elements names are not too long
|
||||||
|
if (elts_names_length+2 > ELEMENTS_NAMES_MAX)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
obidebug(1, "\nError: element names too long (max: %d)", ELEMENTS_NAMES_MAX);
|
obidebug(1, "\nError: element names too long (max: %d)", ELEMENTS_NAMES_MAX);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
strcpy((column->header)->elements_names, elements_names);
|
// Copy the elements names in the header
|
||||||
|
memcpy((column->header)->elements_names, elements_names, elts_names_length*sizeof(char));
|
||||||
|
|
||||||
|
// Terminal characters
|
||||||
|
(column->header)->elements_names[elts_names_length] = '\0';
|
||||||
|
(column->header)->elements_names[elts_names_length + 1] = '\0';
|
||||||
|
|
||||||
|
// Store the length of the character array containing the elements names
|
||||||
|
(column->header)->elements_names_length = elts_names_length;
|
||||||
|
|
||||||
|
// Build the elements names index
|
||||||
|
i = 0;
|
||||||
|
j = 0;
|
||||||
|
// Index the first element name
|
||||||
|
((column->header)->elements_names_idx)[j] = i;
|
||||||
|
((column->header)->sorted_elements_idx)[j] = j;
|
||||||
|
i++;
|
||||||
|
j++;
|
||||||
|
while (i < elts_names_length)
|
||||||
|
{
|
||||||
|
if (elements_names[i] == '\0')
|
||||||
|
{ // Index new element name
|
||||||
|
((column->header)->elements_names_idx)[j] = i+1;
|
||||||
|
((column->header)->sorted_elements_idx)[j] = j;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the sorted index
|
||||||
|
qsort_user_data((column->header)->sorted_elements_idx, j, sizeof(int), column->header, cmp_elements_names_with_idx);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
|
|
||||||
|
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted)
|
||||||
|
{
|
||||||
|
char sep;
|
||||||
|
int i = 0;
|
||||||
|
bool stop = false;
|
||||||
|
index_t count = 0;
|
||||||
|
|
||||||
|
if (elt_names_formatted)
|
||||||
|
sep = FORMATTED_ELT_NAMES_SEPARATOR;
|
||||||
|
else
|
||||||
|
sep = NOT_FORMATTED_ELT_NAMES_SEPARATOR;
|
||||||
|
|
||||||
|
while (! stop)
|
||||||
|
{
|
||||||
|
if ((elt_names_formatted && (elements_names[i] == '\0') && (elements_names[i+1] == '\0')) ||
|
||||||
|
((! elt_names_formatted) && (elements_names[i] == '\0')))
|
||||||
|
stop = true;
|
||||||
|
if ((elements_names[i] == sep) || (elements_names[i] == '\0'))
|
||||||
|
count++;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static int get_formatted_elt_names_length(const char* elements_names)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
bool stop = false;
|
||||||
|
|
||||||
|
while (! stop)
|
||||||
|
{
|
||||||
|
if ((elements_names[i] == '\0') && (elements_names[i+1] == '\0'))
|
||||||
|
stop = true;
|
||||||
|
else
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
|
||||||
{
|
{
|
||||||
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
|
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
|
||||||
}
|
}
|
||||||
@ -493,6 +713,7 @@ index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_lin
|
|||||||
*
|
*
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory)
|
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory)
|
||||||
{
|
{
|
||||||
off_t loc_size;
|
off_t loc_size;
|
||||||
@ -557,6 +778,7 @@ obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_dire
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name)
|
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name)
|
||||||
{
|
{
|
||||||
OBIDMS_column_directory_p column_directory;
|
OBIDMS_column_directory_p column_directory;
|
||||||
@ -582,6 +804,7 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
size_t obi_get_platform_header_size()
|
size_t obi_get_platform_header_size()
|
||||||
{
|
{
|
||||||
size_t header_size;
|
size_t header_size;
|
||||||
@ -607,7 +830,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
const char* indexer_name,
|
const char* indexer_name,
|
||||||
const char* associated_column_name,
|
const char* associated_column_name,
|
||||||
obiversion_t associated_column_version,
|
obiversion_t associated_column_version,
|
||||||
const char* comments
|
const char* comments,
|
||||||
|
bool elt_names_formatted
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
OBIDMS_column_p new_column;
|
OBIDMS_column_p new_column;
|
||||||
@ -623,6 +847,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
OBIType_t returned_data_type;
|
OBIType_t returned_data_type;
|
||||||
OBIType_t stored_data_type;
|
OBIType_t stored_data_type;
|
||||||
char* final_indexer_name;
|
char* final_indexer_name;
|
||||||
|
char* built_elements_names = NULL;
|
||||||
|
int elts_names_length;
|
||||||
|
|
||||||
new_column = NULL;
|
new_column = NULL;
|
||||||
|
|
||||||
@ -695,31 +921,29 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
else if (nb_lines < minimum_line_count)
|
else if (nb_lines < minimum_line_count)
|
||||||
nb_lines = minimum_line_count;
|
nb_lines = minimum_line_count;
|
||||||
|
|
||||||
// Check and build if needed the element names
|
// Check, format, and build if needed the element names
|
||||||
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0)) // Build the default element names: str of the element index
|
if ((elements_names == NULL) || (*elements_names == '\0')) // Build the default element names: str of the element index
|
||||||
{
|
{
|
||||||
elements_names = build_default_elements_names(nb_elements_per_line);
|
built_elements_names = build_default_elements_names(nb_elements_per_line);
|
||||||
if (elements_names == NULL)
|
if (built_elements_names == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
elements_names = built_elements_names;
|
||||||
}
|
}
|
||||||
else if (((elements_names == NULL) || (strcmp(elements_names, "") != 0)) && (nb_elements_per_line > 1))
|
else
|
||||||
{ // The number of elements names should be equal to the number of elements per line
|
{ // The number of elements names should be equal to the number of elements per line
|
||||||
char* token;
|
if (check_elt_names_count(elements_names, elt_names_formatted) != nb_elements_per_line)
|
||||||
index_t n = 0;
|
|
||||||
token = strdup(elements_names);
|
|
||||||
token = strtok(token, ";");
|
|
||||||
while (token != NULL)
|
|
||||||
{
|
{
|
||||||
token = strtok(NULL, ";");
|
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line:"
|
||||||
n++;
|
"\n%lld elements per line\nelements names:%s\n", nb_elements_per_line, elements_names);
|
||||||
}
|
|
||||||
if (n != nb_elements_per_line)
|
|
||||||
{
|
|
||||||
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// TODO what if 1 element and name specified? doc
|
|
||||||
|
// Format the elements names string
|
||||||
|
if (! elt_names_formatted)
|
||||||
|
format_elements_names(elements_names, &elts_names_length);
|
||||||
|
else
|
||||||
|
elts_names_length = get_formatted_elt_names_length(elements_names);
|
||||||
|
|
||||||
// Calculate the size needed
|
// Calculate the size needed
|
||||||
header_size = obi_get_platform_header_size();
|
header_size = obi_get_platform_header_size();
|
||||||
@ -816,11 +1040,11 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
header->version = version_number;
|
header->version = version_number;
|
||||||
header->cloned_from = -1;
|
header->cloned_from = -1;
|
||||||
|
|
||||||
obi_column_set_elements_names(new_column, elements_names);
|
set_elements_names(new_column, elements_names, elts_names_length);
|
||||||
|
|
||||||
// Free the element names if they were built
|
// Free the element names if they were built
|
||||||
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0))
|
if (built_elements_names != NULL)
|
||||||
free(elements_names);
|
free(built_elements_names);
|
||||||
|
|
||||||
strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME);
|
strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME);
|
||||||
|
|
||||||
@ -886,6 +1110,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||||
const char* column_name,
|
const char* column_name,
|
||||||
obiversion_t version_number)
|
obiversion_t version_number)
|
||||||
@ -1043,6 +1268,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
||||||
OBIDMS_column_p line_selection,
|
OBIDMS_column_p line_selection,
|
||||||
const char* column_name,
|
const char* column_name,
|
||||||
@ -1083,7 +1309,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
|||||||
(column_to_clone->header)->indexer_name,
|
(column_to_clone->header)->indexer_name,
|
||||||
((column_to_clone->header)->associated_column).column_name,
|
((column_to_clone->header)->associated_column).column_name,
|
||||||
((column_to_clone->header)->associated_column).version,
|
((column_to_clone->header)->associated_column).version,
|
||||||
(column_to_clone->header)->comments
|
(column_to_clone->header)->comments,
|
||||||
|
true
|
||||||
);
|
);
|
||||||
|
|
||||||
if (new_column == NULL)
|
if (new_column == NULL)
|
||||||
@ -1097,6 +1324,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(new_column->header)->cloned_from = (column_to_clone->header)->version;
|
(new_column->header)->cloned_from = (column_to_clone->header)->version;
|
||||||
|
|
||||||
if (clone_data && (line_selection == NULL))
|
if (clone_data && (line_selection == NULL))
|
||||||
@ -1137,6 +1366,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int obi_close_column(OBIDMS_column_p column)
|
int obi_close_column(OBIDMS_column_p column)
|
||||||
{
|
{
|
||||||
int ret_val = 0;
|
int ret_val = 0;
|
||||||
@ -1185,6 +1415,7 @@ int obi_close_column(OBIDMS_column_p column)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int obi_clone_column_indexer(OBIDMS_column_p column)
|
int obi_clone_column_indexer(OBIDMS_column_p column)
|
||||||
{
|
{
|
||||||
char* new_indexer_name;
|
char* new_indexer_name;
|
||||||
@ -1208,6 +1439,7 @@ int obi_clone_column_indexer(OBIDMS_column_p column)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap/remap?
|
int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap/remap?
|
||||||
{
|
{
|
||||||
size_t file_size;
|
size_t file_size;
|
||||||
@ -1309,6 +1541,7 @@ int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int obi_enlarge_column(OBIDMS_column_p column)
|
int obi_enlarge_column(OBIDMS_column_p column)
|
||||||
{
|
{
|
||||||
size_t file_size;
|
size_t file_size;
|
||||||
@ -1363,7 +1596,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
header_size = (column->header)->header_size;
|
header_size = (column->header)->header_size;
|
||||||
file_size = header_size + new_data_size;
|
file_size = header_size + new_data_size;
|
||||||
|
|
||||||
// Enlarge the file // TODO isn't it possible that this makes the file "move"?
|
// Enlarge the file
|
||||||
if (ftruncate(column_file_descriptor, file_size) < 0)
|
if (ftruncate(column_file_descriptor, file_size) < 0)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
@ -1414,6 +1647,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void obi_ini_to_NA_values(OBIDMS_column_p column,
|
void obi_ini_to_NA_values(OBIDMS_column_p column,
|
||||||
index_t first_line_nb,
|
index_t first_line_nb,
|
||||||
index_t nb_lines)
|
index_t nb_lines)
|
||||||
@ -1479,6 +1713,7 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number)
|
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number)
|
||||||
{
|
{
|
||||||
OBIDMS_column_header_p header;
|
OBIDMS_column_header_p header;
|
||||||
@ -1562,6 +1797,7 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int obi_close_header(OBIDMS_column_header_p header)
|
int obi_close_header(OBIDMS_column_header_p header)
|
||||||
{
|
{
|
||||||
if (munmap(header, header->header_size) < 0)
|
if (munmap(header, header->header_size) < 0)
|
||||||
@ -1574,47 +1810,56 @@ int obi_close_header(OBIDMS_column_header_p header)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// TODO to be rewritten in an optimized and safe way if possible
|
|
||||||
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
|
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
|
||||||
{
|
{
|
||||||
char* elements_names;
|
int* elt_names_idx;
|
||||||
char* name;
|
|
||||||
index_t element_index;
|
|
||||||
|
|
||||||
elements_names = strdup((column->header)->elements_names);
|
elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(int), column->header, cmp_elements_names_with_name_and_idx);
|
||||||
|
|
||||||
|
if (elt_names_idx != NULL)
|
||||||
|
return (index_t)(*elt_names_idx);
|
||||||
|
|
||||||
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
|
obidebug(1, "\nError: could not find element name %s", element_name);
|
||||||
|
return OBIIdx_NA;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// TODO doc, returns elements names with ; as separator (discuss maybe char**)
|
||||||
|
char* obi_get_elements_names(OBIDMS_column_p column)
|
||||||
|
{
|
||||||
|
char* elements_names;
|
||||||
|
int i, j;
|
||||||
|
int elt_idx;
|
||||||
|
int len;
|
||||||
|
|
||||||
|
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
|
||||||
if (elements_names == NULL)
|
if (elements_names == NULL)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError strdup-ing the elements names");
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
return OBIIdx_NA;
|
obidebug(1, "\nError allocating memory for elements names");
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
element_index = 0;
|
j = 0;
|
||||||
|
for (i=0; i < (column->header)->nb_elements_per_line; i++)
|
||||||
name = strtok(elements_names, ";"); // TODO not thread safe, see strtok_r maybe
|
|
||||||
if (strcmp(element_name, name) == 0)
|
|
||||||
{
|
{
|
||||||
free(elements_names);
|
elt_idx = ((column->header)->elements_names_idx)[i];
|
||||||
return element_index;
|
len = strlen(((column->header)->elements_names)+elt_idx);
|
||||||
}
|
memcpy(elements_names+j, ((column->header)->elements_names)+elt_idx, len*sizeof(char));
|
||||||
element_index++;
|
j = j + len;
|
||||||
|
elements_names[j] = ';';
|
||||||
while (name != NULL)
|
j++;
|
||||||
{
|
|
||||||
name = strtok(NULL, ";"); // TODO not thread safe, see strtok_r maybe
|
|
||||||
if (strcmp(element_name, name) == 0)
|
|
||||||
{
|
|
||||||
free(elements_names);
|
|
||||||
return element_index;
|
|
||||||
}
|
|
||||||
element_index++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
obidebug(1, "\nCan't find an element name");
|
elements_names[j - 1] = '\0';
|
||||||
free(elements_names);
|
|
||||||
return OBIIdx_NA;
|
return elements_names;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
|
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
|
||||||
{
|
{
|
||||||
// Check if the column is read-only
|
// Check if the column is read-only
|
||||||
@ -1649,6 +1894,7 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
|
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
|
||||||
{
|
{
|
||||||
if ((line_nb+1) > ((column->header)->line_count))
|
if ((line_nb+1) > ((column->header)->line_count))
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
|
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
|
||||||
*/
|
*/
|
||||||
#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
|
#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
|
||||||
* are used ("0;1;2;...;n"), considering ELEMENTS_NAMES_MAX.
|
* are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX.
|
||||||
*/
|
*/
|
||||||
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
|
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
|
||||||
*/
|
*/
|
||||||
@ -39,6 +39,10 @@
|
|||||||
*/
|
*/
|
||||||
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
|
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
|
||||||
*/
|
*/
|
||||||
|
#define FORMATTED_ELT_NAMES_SEPARATOR '\0' /**< The maximum length for comments.
|
||||||
|
*/
|
||||||
|
#define NOT_FORMATTED_ELT_NAMES_SEPARATOR ';' /**< The maximum length for comments.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -66,9 +70,15 @@ typedef struct OBIDMS_column_header {
|
|||||||
*/
|
*/
|
||||||
index_t nb_elements_per_line; /**< Number of elements per line.
|
index_t nb_elements_per_line; /**< Number of elements per line.
|
||||||
*/
|
*/
|
||||||
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator
|
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with '\0' as separator
|
||||||
* (no terminal ';').
|
* and '\0\0' as terminal flag.
|
||||||
* (default are the indices: "0;1;2;...;n").
|
* (default are the indices: "0\01\02\0...\0n\0\0").
|
||||||
|
*/
|
||||||
|
int elements_names_length; /**< Length of the character array where the elements names are stored.
|
||||||
|
*/
|
||||||
|
int elements_names_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the start of each element name in elements_names.
|
||||||
|
*/
|
||||||
|
int sorted_elements_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the sorted element names in elements_names_idx.
|
||||||
*/
|
*/
|
||||||
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
|
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
|
||||||
* element from the column.
|
* element from the column.
|
||||||
@ -184,12 +194,13 @@ size_t obi_get_platform_header_size();
|
|||||||
* @param nb_lines The number of lines to be stored.
|
* @param nb_lines The number of lines to be stored.
|
||||||
* @param nb_elements_per_line The number of elements per line. // TODO talk about default values
|
* @param nb_elements_per_line The number of elements per line. // TODO talk about default values
|
||||||
* @param elements_names The names of the elements with ';' as separator (no terminal ';'),
|
* @param elements_names The names of the elements with ';' as separator (no terminal ';'),
|
||||||
* NULL or "" if the default names are to be used ("0;1;2;...;n").
|
* NULL or "" if the default names are to be used ("0\01\02\0...\0n").
|
||||||
* @param indexer_name The name of the indexer if there is one associated with the column.
|
* @param indexer_name The name of the indexer if there is one associated with the column.
|
||||||
* If NULL or "", the indexer name is set as the column name.
|
* If NULL or "", the indexer name is set as the column name.
|
||||||
* @param associated_column_name The name of the associated column if there is one.
|
* @param associated_column_name The name of the associated column if there is one.
|
||||||
* @param associated_column_version The version of the associated column if there is one.
|
* @param associated_column_version The version of the associated column if there is one.
|
||||||
* @param comments Optional comments associated with the column.
|
* @param comments Optional comments associated with the column.
|
||||||
|
* @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
|
||||||
*
|
*
|
||||||
* @returns A pointer on the newly created column structure.
|
* @returns A pointer on the newly created column structure.
|
||||||
* @retval NULL if an error occurred.
|
* @retval NULL if an error occurred.
|
||||||
@ -206,7 +217,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
const char* indexer_name,
|
const char* indexer_name,
|
||||||
const char* associated_column_name,
|
const char* associated_column_name,
|
||||||
obiversion_t associated_column_version,
|
obiversion_t associated_column_version,
|
||||||
const char* comments
|
const char* comments,
|
||||||
|
bool elt_names_formatted
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
@ -353,7 +365,7 @@ int obi_close_header(OBIDMS_column_header_p header);
|
|||||||
* @param element_name The name of the element.
|
* @param element_name The name of the element.
|
||||||
*
|
*
|
||||||
* @returns The index of the element in a line of the column.
|
* @returns The index of the element in a line of the column.
|
||||||
* @retval OBIIdx_NA if an error occurred. // TODO not sure if this is "clean".
|
* @retval OBIIdx_NA if an error occurred.
|
||||||
*
|
*
|
||||||
* @since July 2015
|
* @since July 2015
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
@ -361,6 +373,22 @@ int obi_close_header(OBIDMS_column_header_p header);
|
|||||||
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
|
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Recovers the elements names of the lines of a column, with ';' as separator (i.e. "0;1;2;...;n\0").
|
||||||
|
*
|
||||||
|
* @warning The returned pointer has to be freed by the caller.
|
||||||
|
*
|
||||||
|
* @param column A pointer on an OBIDMS column.
|
||||||
|
*
|
||||||
|
* @returns A pointer on a character array where the elements names are stored.
|
||||||
|
* @retval NULL if an error occurred.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
char* obi_get_elements_names(OBIDMS_column_p column);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Prepares a column to set a value.
|
* @brief Prepares a column to set a value.
|
||||||
*
|
*
|
||||||
|
525
src/obiview.c
525
src/obiview.c
@ -47,7 +47,7 @@
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal function building the file name where the informations about an obiview are stored.
|
* Internal function building the file name where the informations about a finished, read-only obiview are stored.
|
||||||
*
|
*
|
||||||
* @warning The returned pointer has to be freed by the caller.
|
* @warning The returned pointer has to be freed by the caller.
|
||||||
*
|
*
|
||||||
@ -63,7 +63,23 @@ static char* build_obiview_file_name(const char* view_name);
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal function checking if a view with a given name already exists in a DMS.
|
* Internal function building the file name where the informations about an unfinished, writable obiview are stored.
|
||||||
|
*
|
||||||
|
* @warning The returned pointer has to be freed by the caller.
|
||||||
|
*
|
||||||
|
* @param view_name The name of the view.
|
||||||
|
*
|
||||||
|
* @returns A pointer to the file name.
|
||||||
|
* @retval NULL if an error occurred.
|
||||||
|
*
|
||||||
|
* @since February 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static char* build_unfinished_obiview_file_name(const char* view_name);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal function checking if a view (either finished or unfinished) with a given name already exists in a DMS.
|
||||||
*
|
*
|
||||||
* @param dms The DMS.
|
* @param dms The DMS.
|
||||||
* @param view_name The name of the view.
|
* @param view_name The name of the view.
|
||||||
@ -73,7 +89,7 @@ static char* build_obiview_file_name(const char* view_name);
|
|||||||
* @since September 2016
|
* @since September 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
bool view_exists(OBIDMS_p dms, const char* view_name);
|
static bool view_exists(OBIDMS_p dms, const char* view_name);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -84,7 +100,7 @@ bool view_exists(OBIDMS_p dms, const char* view_name);
|
|||||||
* @since June 2016
|
* @since June 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
size_t get_platform_view_file_size();
|
static size_t get_platform_view_file_size();
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -99,7 +115,7 @@ size_t get_platform_view_file_size();
|
|||||||
* @since August 2016
|
* @since August 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int enlarge_view_file(Obiview_p view, size_t new_size);
|
static int enlarge_view_file(Obiview_p view, size_t new_size);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -117,7 +133,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size);
|
|||||||
* @since August 2016
|
* @since August 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int write_comments_to_view_file(Obiview_p view, const char* comments);
|
static int write_comments_to_view_file(Obiview_p view, const char* comments);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -134,7 +150,7 @@ int write_comments_to_view_file(Obiview_p view, const char* comments);
|
|||||||
* @since June 2016
|
* @since June 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int create_obiview_file(OBIDMS_p dms, const char* view_name);
|
static int create_obiview_file(OBIDMS_p dms, const char* view_name);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -156,7 +172,7 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name);
|
|||||||
* @since June 2016
|
* @since June 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
void update_column_refs(Obiview_p view);
|
static void update_column_refs(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -175,7 +191,7 @@ void update_column_refs(Obiview_p view);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int create_column_dict(Obiview_p view);
|
static int create_column_dict(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -194,7 +210,7 @@ int create_column_dict(Obiview_p view);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int update_column_dict(Obiview_p view);
|
static int update_column_dict(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -219,7 +235,7 @@ int update_column_dict(Obiview_p view);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int update_column_refs_and_dict(Obiview_p view);
|
static int update_column_refs_and_dict(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -239,7 +255,7 @@ int update_column_refs_and_dict(Obiview_p view);
|
|||||||
* @since February 2016
|
* @since February 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int update_lines(Obiview_p view, index_t line_count);
|
static int update_lines(Obiview_p view, index_t line_count);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -257,7 +273,71 @@ int update_lines(Obiview_p view, index_t line_count);
|
|||||||
* @since February 2016
|
* @since February 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Saves a view, updating its informations in the view file.
|
||||||
|
*
|
||||||
|
* @warning The view must be writable.
|
||||||
|
*
|
||||||
|
* @param view A pointer on the view.
|
||||||
|
*
|
||||||
|
* @returns A value indicating the success of the operation.
|
||||||
|
* @retval 0 if the operation was successfully completed.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since February 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static int save_view(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Rename a view file once the view is finished, replacing the '*.obiview_unfinished' extension with '*.obiview'.
|
||||||
|
*
|
||||||
|
* @param view A pointer on the view.
|
||||||
|
*
|
||||||
|
* @returns A value indicating the success of the operation.
|
||||||
|
* @retval 0 if the operation was successfully completed.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since February 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static int rename_finished_view(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Finishes a view: check the predicates, save all the informations, rename the view file.
|
||||||
|
*
|
||||||
|
* @param view A pointer on the view.
|
||||||
|
*
|
||||||
|
* @returns A value indicating the success of the operation.
|
||||||
|
* @retval 0 if the operation was successfully completed.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since February 2017
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static int finish_view(Obiview_p view);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Closes an opened view.
|
||||||
|
*
|
||||||
|
* @warning Doesn't save the view.
|
||||||
|
*
|
||||||
|
* @param view A pointer on the view.
|
||||||
|
*
|
||||||
|
* @returns A value indicating the success of the operation.
|
||||||
|
* @retval 0 if the operation was successfully completed.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @see obi_save_and_close_view()
|
||||||
|
* @since February 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static int close_view(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -276,7 +356,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
|||||||
* @since April 2016
|
* @since April 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p);
|
static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -294,7 +374,7 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
|
|||||||
* @since April 2016
|
* @since April 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
|
static int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
|
||||||
|
|
||||||
|
|
||||||
/****** PREDICATE FUNCTIONS *******/
|
/****** PREDICATE FUNCTIONS *******/
|
||||||
@ -313,7 +393,7 @@ int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
char* view_has_nuc_sequence_column(Obiview_p view);
|
static char* view_has_nuc_sequence_column(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -330,7 +410,7 @@ char* view_has_nuc_sequence_column(Obiview_p view);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
char* view_has_quality_column(Obiview_p view);
|
static char* view_has_quality_column(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -347,7 +427,7 @@ char* view_has_quality_column(Obiview_p view);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
char* view_has_id_column(Obiview_p view);
|
static char* view_has_id_column(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -364,7 +444,7 @@ char* view_has_id_column(Obiview_p view);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
char* view_has_definition_column(Obiview_p view);
|
static char* view_has_definition_column(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -381,7 +461,7 @@ char* view_has_definition_column(Obiview_p view);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
char* view_check_qual_match_seqs(Obiview_p view);
|
static char* view_check_qual_match_seqs(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -396,7 +476,7 @@ char* view_check_qual_match_seqs(Obiview_p view);
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view));
|
static char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view));
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -410,7 +490,7 @@ char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obivi
|
|||||||
* @since July 2016
|
* @since July 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
char* view_check_all_predicates(Obiview_p view);
|
static char* view_check_all_predicates(Obiview_p view);
|
||||||
|
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
@ -443,11 +523,35 @@ static char* build_obiview_file_name(const char* view_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool view_exists(OBIDMS_p dms, const char* view_name)
|
static char* build_unfinished_obiview_file_name(const char* view_name)
|
||||||
|
{
|
||||||
|
char* file_name;
|
||||||
|
|
||||||
|
// Build file name
|
||||||
|
file_name = (char*) malloc((strlen(view_name) + 19 + 1)*sizeof(char));
|
||||||
|
if (file_name == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for a view file name");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (sprintf(file_name, "%s.obiview_unfinished", view_name) < 0)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBIVIEW_ERROR);
|
||||||
|
obidebug(1, "\nProblem building an unfinished obiview file name");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return file_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool view_exists(OBIDMS_p dms, const char* view_name)
|
||||||
{
|
{
|
||||||
struct dirent* dp;
|
struct dirent* dp;
|
||||||
char* file_name;
|
char* file_name;
|
||||||
|
|
||||||
|
// Check finished views
|
||||||
// Create file name
|
// Create file name
|
||||||
file_name = build_obiview_file_name(view_name);
|
file_name = build_obiview_file_name(view_name);
|
||||||
if (file_name == NULL)
|
if (file_name == NULL)
|
||||||
@ -458,13 +562,38 @@ bool view_exists(OBIDMS_p dms, const char* view_name)
|
|||||||
if ((dp->d_name)[0] == '.')
|
if ((dp->d_name)[0] == '.')
|
||||||
continue;
|
continue;
|
||||||
if (strcmp(dp->d_name, file_name) == 0)
|
if (strcmp(dp->d_name, file_name) == 0)
|
||||||
|
{
|
||||||
|
free(file_name);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(file_name);
|
||||||
|
|
||||||
|
// Check unfinished views
|
||||||
|
// Create file name
|
||||||
|
file_name = build_unfinished_obiview_file_name(view_name);
|
||||||
|
if (file_name == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
while ((dp = readdir(dms->view_directory)) != NULL)
|
||||||
|
{
|
||||||
|
if ((dp->d_name)[0] == '.')
|
||||||
|
continue;
|
||||||
|
if (strcmp(dp->d_name, file_name) == 0)
|
||||||
|
{
|
||||||
|
free(file_name);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(file_name);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t get_platform_view_file_size()
|
static size_t get_platform_view_file_size()
|
||||||
{
|
{
|
||||||
size_t obiview_size;
|
size_t obiview_size;
|
||||||
size_t rounded_obiview_size;
|
size_t rounded_obiview_size;
|
||||||
@ -480,7 +609,7 @@ size_t get_platform_view_file_size()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int enlarge_view_file(Obiview_p view, size_t new_size)
|
static int enlarge_view_file(Obiview_p view, size_t new_size)
|
||||||
{
|
{
|
||||||
int obiview_file_descriptor;
|
int obiview_file_descriptor;
|
||||||
double multiple;
|
double multiple;
|
||||||
@ -488,7 +617,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size)
|
|||||||
char* file_name;
|
char* file_name;
|
||||||
|
|
||||||
// Create file name
|
// Create file name
|
||||||
file_name = build_obiview_file_name((view->infos)->name);
|
file_name = build_unfinished_obiview_file_name((view->infos)->name);
|
||||||
if (file_name == NULL)
|
if (file_name == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -556,7 +685,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int write_comments_to_view_file(Obiview_p view, const char* comments)
|
static int write_comments_to_view_file(Obiview_p view, const char* comments)
|
||||||
{
|
{
|
||||||
size_t new_size;
|
size_t new_size;
|
||||||
|
|
||||||
@ -580,14 +709,14 @@ int write_comments_to_view_file(Obiview_p view, const char* comments)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int create_obiview_file(OBIDMS_p dms, const char* view_name)
|
static int create_obiview_file(OBIDMS_p dms, const char* view_name)
|
||||||
{
|
{
|
||||||
char* file_name;
|
char* file_name;
|
||||||
int obiview_file_descriptor;
|
int obiview_file_descriptor;
|
||||||
size_t file_size;
|
size_t file_size;
|
||||||
|
|
||||||
// Create file name
|
// Create file name
|
||||||
file_name = build_obiview_file_name(view_name);
|
file_name = build_unfinished_obiview_file_name(view_name);
|
||||||
if (file_name == NULL)
|
if (file_name == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -634,7 +763,7 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void update_column_refs(Obiview_p view)
|
static void update_column_refs(Obiview_p view)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -646,7 +775,7 @@ void update_column_refs(Obiview_p view)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int create_column_dict(Obiview_p view)
|
static int create_column_dict(Obiview_p view)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -681,7 +810,7 @@ int create_column_dict(Obiview_p view)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int update_column_dict(Obiview_p view)
|
static int update_column_dict(Obiview_p view)
|
||||||
{
|
{
|
||||||
// Re-initialize the dictionary to rebuild it from scratch
|
// Re-initialize the dictionary to rebuild it from scratch
|
||||||
ht_free(view->column_dict);
|
ht_free(view->column_dict);
|
||||||
@ -693,14 +822,14 @@ int update_column_dict(Obiview_p view)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int update_column_refs_and_dict(Obiview_p view)
|
static int update_column_refs_and_dict(Obiview_p view)
|
||||||
{
|
{
|
||||||
update_column_refs(view);
|
update_column_refs(view);
|
||||||
return update_column_dict(view);
|
return update_column_dict(view);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int update_lines(Obiview_p view, index_t line_count)
|
static int update_lines(Obiview_p view, index_t line_count)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -739,7 +868,7 @@ int update_lines(Obiview_p view, index_t line_count)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
OBIDMS_column_p column = NULL;
|
OBIDMS_column_p column = NULL;
|
||||||
@ -799,7 +928,161 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
|
static int save_view(Obiview_p view)
|
||||||
|
{
|
||||||
|
// Check that the view is not read-only
|
||||||
|
if (view->read_only)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBIVIEW_ERROR);
|
||||||
|
obidebug(1, "\nError trying to save a read-only view");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store reference for the line selection associated with that view if there is one
|
||||||
|
if (view->line_selection != NULL) // Unnecessary in theory, the line selection references are already saved
|
||||||
|
{
|
||||||
|
strcpy(((view->infos)->line_selection).column_name, ((view->line_selection)->header)->name);
|
||||||
|
((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
|
||||||
|
(view->infos)->all_lines = false;
|
||||||
|
}
|
||||||
|
else // Necessary because line selection could have been deleted if a column was cloned
|
||||||
|
{
|
||||||
|
(((view->infos)->line_selection).column_name)[0] = '\0';
|
||||||
|
((view->infos)->line_selection).version = -1;
|
||||||
|
(view->infos)->all_lines = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
update_column_refs(view);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int rename_finished_view(Obiview_p view)
|
||||||
|
{
|
||||||
|
char* old_name;
|
||||||
|
char* new_name;
|
||||||
|
char* path_old_name;
|
||||||
|
char* path_new_name;
|
||||||
|
char* full_path_old_name;
|
||||||
|
char* full_path_new_name;
|
||||||
|
|
||||||
|
old_name = build_unfinished_obiview_file_name((view->infos)->name);
|
||||||
|
new_name = build_obiview_file_name((view->infos)->name);
|
||||||
|
|
||||||
|
path_old_name = malloc(MAX_PATH_LEN);
|
||||||
|
path_new_name = malloc(MAX_PATH_LEN);
|
||||||
|
|
||||||
|
strcpy(path_old_name, "VIEWS/");
|
||||||
|
strcat(path_old_name, old_name);
|
||||||
|
|
||||||
|
strcpy(path_new_name, "VIEWS/");
|
||||||
|
strcat(path_new_name, new_name);
|
||||||
|
|
||||||
|
full_path_old_name = obi_dms_get_full_path(view->dms, path_old_name);
|
||||||
|
full_path_new_name = obi_dms_get_full_path(view->dms, path_new_name);
|
||||||
|
|
||||||
|
if (rename(full_path_old_name, full_path_new_name) < 0)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBIVIEW_ERROR);
|
||||||
|
obidebug(1, "\nError renaming the file of a finished view: %s", full_path_new_name);
|
||||||
|
free(old_name);
|
||||||
|
free(new_name);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(old_name);
|
||||||
|
free(new_name);
|
||||||
|
free(path_new_name);
|
||||||
|
free(path_old_name);
|
||||||
|
free(full_path_old_name);
|
||||||
|
free(full_path_new_name);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int finish_view(Obiview_p view)
|
||||||
|
{
|
||||||
|
char* predicates;
|
||||||
|
|
||||||
|
// Check that the view is not read-only
|
||||||
|
if (view->read_only)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBIVIEW_ERROR);
|
||||||
|
obidebug(1, "\nError trying to save a read-only view");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check predicates
|
||||||
|
predicates = view_check_all_predicates(view);
|
||||||
|
if (predicates == NULL)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nView predicates not respected");
|
||||||
|
return -1; // TODO reverse view (delete files)
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
write_comments_to_view_file(view, predicates);
|
||||||
|
free(predicates);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (save_view(view) < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (rename_finished_view(view) < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Flag the view as finished
|
||||||
|
(view->infos)->finished = true;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int close_view(Obiview_p view)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int ret_value;
|
||||||
|
|
||||||
|
ret_value = 0;
|
||||||
|
|
||||||
|
for (i=0; i < ((view->infos)->column_count); i++)
|
||||||
|
{
|
||||||
|
if (obi_close_column((view->columns)[i]) < 0)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError closing a column while closing a view");
|
||||||
|
ret_value = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close line selection if there is one
|
||||||
|
if (view->line_selection != NULL)
|
||||||
|
{
|
||||||
|
if (obi_close_column(view->line_selection) < 0)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError closing a line selection while closing a view");
|
||||||
|
ret_value = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free the column dictionary
|
||||||
|
ht_free(view->column_dict);
|
||||||
|
|
||||||
|
// Unmap view file
|
||||||
|
if (obi_view_unmap_file(view->dms, view->infos) < 0)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError unmaping a view file while closing a view");
|
||||||
|
ret_value = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(view);
|
||||||
|
|
||||||
|
return ret_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
char* column_name = NULL;
|
char* column_name = NULL;
|
||||||
@ -846,7 +1129,7 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
|
static int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
|
||||||
{
|
{
|
||||||
if (((*line_nb_p)+1) > ((view->infos)->line_count))
|
if (((*line_nb_p)+1) > ((view->infos)->line_count))
|
||||||
{
|
{
|
||||||
@ -865,7 +1148,7 @@ int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
|
|||||||
|
|
||||||
/****** PREDICATE FUNCTIONS *******/
|
/****** PREDICATE FUNCTIONS *******/
|
||||||
|
|
||||||
char* view_has_nuc_sequence_column(Obiview_p view)
|
static char* view_has_nuc_sequence_column(Obiview_p view)
|
||||||
{
|
{
|
||||||
char* predicate;
|
char* predicate;
|
||||||
|
|
||||||
@ -889,7 +1172,7 @@ char* view_has_nuc_sequence_column(Obiview_p view)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char* view_has_quality_column(Obiview_p view)
|
static char* view_has_quality_column(Obiview_p view)
|
||||||
{
|
{
|
||||||
char* predicate;
|
char* predicate;
|
||||||
|
|
||||||
@ -913,7 +1196,7 @@ char* view_has_quality_column(Obiview_p view)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char* view_has_id_column(Obiview_p view)
|
static char* view_has_id_column(Obiview_p view)
|
||||||
{
|
{
|
||||||
char* predicate;
|
char* predicate;
|
||||||
|
|
||||||
@ -936,7 +1219,8 @@ char* view_has_id_column(Obiview_p view)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
char* view_has_definition_column(Obiview_p view)
|
|
||||||
|
static char* view_has_definition_column(Obiview_p view)
|
||||||
{
|
{
|
||||||
char* predicate;
|
char* predicate;
|
||||||
|
|
||||||
@ -960,7 +1244,7 @@ char* view_has_definition_column(Obiview_p view)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char* view_check_qual_match_seqs(Obiview_p view)
|
static char* view_check_qual_match_seqs(Obiview_p view)
|
||||||
{
|
{
|
||||||
index_t i, j, k;
|
index_t i, j, k;
|
||||||
index_t nb_elements_per_line;
|
index_t nb_elements_per_line;
|
||||||
@ -1053,13 +1337,13 @@ char* view_check_qual_match_seqs(Obiview_p view)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view))
|
static char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view))
|
||||||
{
|
{
|
||||||
return predicate_function(view);
|
return predicate_function(view);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char* view_check_all_predicates(Obiview_p view)
|
static char* view_check_all_predicates(Obiview_p view)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
size_t size_to_allocate;
|
size_t size_to_allocate;
|
||||||
@ -1195,7 +1479,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Map view file
|
// Map view file
|
||||||
view->infos = obi_view_map_file(dms, view_name);
|
view->infos = obi_view_map_file(dms, view_name, false);
|
||||||
if (view->infos == NULL)
|
if (view->infos == NULL)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError mapping the informations of a new view");
|
obidebug(1, "\nError mapping the informations of a new view");
|
||||||
@ -1236,7 +1520,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
|||||||
// If there is a new line selection, build it by combining it with the one from the view to clone if there is one
|
// If there is a new line selection, build it by combining it with the one from the view to clone if there is one
|
||||||
else if (line_selection != NULL)
|
else if (line_selection != NULL)
|
||||||
{
|
{
|
||||||
view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL);
|
view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL, false);
|
||||||
if ((view->line_selection) == NULL)
|
if ((view->line_selection) == NULL)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating a column corresponding to a line selection");
|
obidebug(1, "\nError creating a column corresponding to a line selection");
|
||||||
@ -1305,7 +1589,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
|||||||
if (write_comments_to_view_file(view, clone_comment) < 0)
|
if (write_comments_to_view_file(view, clone_comment) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError writing comments when creating a view");
|
obidebug(1, "\nError writing comments when creating a view");
|
||||||
obi_close_view(view);
|
close_view(view);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1341,7 +1625,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
|||||||
if (write_comments_to_view_file(view, comments) < 0)
|
if (write_comments_to_view_file(view, comments) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError writing comments when creating a view");
|
obidebug(1, "\nError writing comments when creating a view");
|
||||||
obi_close_view(view);
|
close_view(view);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1360,7 +1644,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
|||||||
// Create the column dictionary (hash table) associating column names (or aliases) to column pointers
|
// Create the column dictionary (hash table) associating column names (or aliases) to column pointers
|
||||||
if (create_column_dict(view) < 0)
|
if (create_column_dict(view) < 0)
|
||||||
{
|
{
|
||||||
obi_close_view(view);
|
close_view(view);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1409,7 +1693,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
|
|||||||
return NULL;
|
return NULL;
|
||||||
view = obi_new_view(dms, view_name, view_to_clone, line_selection, comments);
|
view = obi_new_view(dms, view_name, view_to_clone, line_selection, comments);
|
||||||
|
|
||||||
obi_close_view(view_to_clone);
|
close_view(view_to_clone);
|
||||||
|
|
||||||
return view;
|
return view;
|
||||||
}
|
}
|
||||||
@ -1511,26 +1795,43 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
|
|||||||
return NULL;
|
return NULL;
|
||||||
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column);
|
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column);
|
||||||
|
|
||||||
obi_close_view(view_to_clone);
|
close_view(view_to_clone);
|
||||||
|
|
||||||
return view;
|
return view;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
|
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bool finished)
|
||||||
{
|
{
|
||||||
char* file_name;
|
char* file_name;
|
||||||
Obiview_infos_p view_infos;
|
Obiview_infos_p view_infos;
|
||||||
int obiview_file_descriptor;
|
int obiview_file_descriptor;
|
||||||
size_t file_size;
|
size_t file_size;
|
||||||
|
int open_flag;
|
||||||
|
int mmap_flag;
|
||||||
|
|
||||||
// Create file name
|
// Create file name
|
||||||
|
if (finished)
|
||||||
file_name = build_obiview_file_name(view_name);
|
file_name = build_obiview_file_name(view_name);
|
||||||
|
else
|
||||||
|
file_name = build_unfinished_obiview_file_name(view_name);
|
||||||
if (file_name == NULL)
|
if (file_name == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
// Set flags (read-only or not)
|
||||||
|
if (finished)
|
||||||
|
{
|
||||||
|
open_flag = O_RDONLY;
|
||||||
|
mmap_flag = PROT_READ;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
open_flag = O_RDWR;
|
||||||
|
mmap_flag = PROT_READ | PROT_WRITE;
|
||||||
|
}
|
||||||
|
|
||||||
// Open view file
|
// Open view file
|
||||||
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDWR, 0777);
|
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, open_flag, 0777);
|
||||||
if (obiview_file_descriptor < 0)
|
if (obiview_file_descriptor < 0)
|
||||||
{
|
{
|
||||||
if (errno == ENOENT)
|
if (errno == ENOENT)
|
||||||
@ -1560,7 +1861,7 @@ Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
|
|||||||
// Map the view infos structure
|
// Map the view infos structure
|
||||||
view_infos = mmap(NULL,
|
view_infos = mmap(NULL,
|
||||||
file_size,
|
file_size,
|
||||||
PROT_READ | PROT_WRITE,
|
mmap_flag,
|
||||||
MAP_SHARED,
|
MAP_SHARED,
|
||||||
obiview_file_descriptor,
|
obiview_file_descriptor,
|
||||||
0
|
0
|
||||||
@ -1590,12 +1891,15 @@ int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
|
|||||||
size_t file_size;
|
size_t file_size;
|
||||||
|
|
||||||
// Get file name
|
// Get file name
|
||||||
|
if (view_infos->finished)
|
||||||
file_name = build_obiview_file_name(view_infos->name);
|
file_name = build_obiview_file_name(view_infos->name);
|
||||||
|
else
|
||||||
|
file_name = build_unfinished_obiview_file_name(view_infos->name);
|
||||||
if (file_name == NULL)
|
if (file_name == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
// Open view file
|
// Open view file
|
||||||
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDWR, 0777);
|
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDONLY, 0777);
|
||||||
if (obiview_file_descriptor < 0)
|
if (obiview_file_descriptor < 0)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBIVIEW_ERROR);
|
obi_set_errno(OBIVIEW_ERROR);
|
||||||
@ -1661,13 +1965,9 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Map view file
|
// Map view file
|
||||||
view->infos = obi_view_map_file(dms, view_name);
|
view->infos = obi_view_map_file(dms, view_name, true);
|
||||||
|
if ((view->infos) == NULL)
|
||||||
// Check that the view is finished and can be opened
|
|
||||||
if ((view->infos)->finished == false)
|
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError opening a view: the view is not finished");
|
|
||||||
obi_view_unmap_file(view->dms, view->infos);
|
|
||||||
free(view);
|
free(view);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -1697,7 +1997,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
|||||||
if (column_pointer == NULL)
|
if (column_pointer == NULL)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, column_name, column_version);
|
obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, column_name, column_version);
|
||||||
obi_close_view(view);
|
close_view(view);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
(view->columns)[i] = column_pointer;
|
(view->columns)[i] = column_pointer;
|
||||||
@ -1713,7 +2013,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
|||||||
if (create_column_dict(view) < 0)
|
if (create_column_dict(view) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column dictionary when opening a view");
|
obidebug(1, "\nError creating the column dictionary when opening a view");
|
||||||
obi_close_view(view);
|
close_view(view);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1792,7 +2092,7 @@ int obi_view_add_column(Obiview_p view,
|
|||||||
// Open or create the column
|
// Open or create the column
|
||||||
if (create)
|
if (create)
|
||||||
{ // Create column
|
{ // Create column
|
||||||
column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, indexer_name, associated_column_name, associated_column_version, comments);
|
column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, indexer_name, associated_column_name, associated_column_version, comments, false);
|
||||||
if (column == NULL)
|
if (column == NULL)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating a column to add to a view");
|
obidebug(1, "\nError creating a column to add to a view");
|
||||||
@ -1963,103 +2263,16 @@ int obi_view_create_column_alias(Obiview_p view, const char* current_name, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int obi_save_view(Obiview_p view)
|
|
||||||
{
|
|
||||||
// Check that the view is not read-only
|
|
||||||
if (view->read_only)
|
|
||||||
{
|
|
||||||
obi_set_errno(OBIVIEW_ERROR);
|
|
||||||
obidebug(1, "\nError trying to save a read-only view");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store reference for the line selection associated with that view if there is one
|
|
||||||
if (view->line_selection != NULL) // Unnecessary in theory, the line selection references are already saved
|
|
||||||
{
|
|
||||||
strcpy(((view->infos)->line_selection).column_name, ((view->line_selection)->header)->name);
|
|
||||||
((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
|
|
||||||
(view->infos)->all_lines = false;
|
|
||||||
}
|
|
||||||
else // Necessary because line selection could have been deleted if a column was cloned
|
|
||||||
{
|
|
||||||
(((view->infos)->line_selection).column_name)[0] = '\0';
|
|
||||||
((view->infos)->line_selection).version = -1;
|
|
||||||
(view->infos)->all_lines = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
update_column_refs(view);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int obi_close_view(Obiview_p view)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int ret_value;
|
|
||||||
|
|
||||||
ret_value = 0;
|
|
||||||
|
|
||||||
for (i=0; i < ((view->infos)->column_count); i++)
|
|
||||||
{
|
|
||||||
if (obi_close_column((view->columns)[i]) < 0)
|
|
||||||
{
|
|
||||||
obidebug(1, "\nError closing a column while closing a view");
|
|
||||||
ret_value = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close line selection if there is one
|
|
||||||
if (view->line_selection != NULL)
|
|
||||||
{
|
|
||||||
if (obi_close_column(view->line_selection) < 0)
|
|
||||||
{
|
|
||||||
obidebug(1, "\nError closing a line selection while closing a view");
|
|
||||||
ret_value = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flag the view as finished
|
|
||||||
(view->infos)->finished = true;
|
|
||||||
|
|
||||||
// Free the column dictionary
|
|
||||||
ht_free(view->column_dict);
|
|
||||||
|
|
||||||
// Unmap view file
|
|
||||||
if (obi_view_unmap_file(view->dms, view->infos) < 0)
|
|
||||||
{
|
|
||||||
obidebug(1, "\nError unmaping a view file while closing a view");
|
|
||||||
ret_value = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
free(view);
|
|
||||||
|
|
||||||
return ret_value;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int obi_save_and_close_view(Obiview_p view)
|
int obi_save_and_close_view(Obiview_p view)
|
||||||
{
|
{
|
||||||
char* predicates;
|
// Finish and save the view if it is not read-only
|
||||||
|
|
||||||
if ( ! (view->read_only))
|
if ( ! (view->read_only))
|
||||||
{
|
if (finish_view(view) < 0)
|
||||||
predicates = view_check_all_predicates(view);
|
|
||||||
if (predicates == NULL)
|
|
||||||
{
|
|
||||||
obidebug(1, "\nView predicates not respected");
|
|
||||||
return -1; // TODO reverse view (delete files)
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
write_comments_to_view_file(view, predicates);
|
|
||||||
free(predicates);
|
|
||||||
}
|
|
||||||
if (obi_save_view(view) < 0)
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
|
||||||
if (obi_close_view(view) < 0)
|
if (close_view(view) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -242,6 +242,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
|
|||||||
*
|
*
|
||||||
* @param dms A pointer on the OBIDMS.
|
* @param dms A pointer on the OBIDMS.
|
||||||
* @param view_name The unique name identifying the view.
|
* @param view_name The unique name identifying the view.
|
||||||
|
* @param finished Whether the view is finished or not.
|
||||||
*
|
*
|
||||||
* @returns A pointer on the mapped view infos structure.
|
* @returns A pointer on the mapped view infos structure.
|
||||||
* @retval NULL if an error occurred.
|
* @retval NULL if an error occurred.
|
||||||
@ -249,7 +250,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
|
|||||||
* @since June 2016
|
* @since June 2016
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
*/
|
*/
|
||||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name);
|
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bool finished);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -444,42 +445,6 @@ int obi_select_line(Obiview_p view, index_t line_nb);
|
|||||||
int obi_select_lines(Obiview_p view, index_t* line_nbs);
|
int obi_select_lines(Obiview_p view, index_t* line_nbs);
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Saves a view, writing it in the view file.
|
|
||||||
*
|
|
||||||
* The view is written at the end of the view file, following the latest written view.
|
|
||||||
*
|
|
||||||
* @warning The view must be writable.
|
|
||||||
*
|
|
||||||
* @param view A pointer on the view.
|
|
||||||
*
|
|
||||||
* @returns A value indicating the success of the operation.
|
|
||||||
* @retval 0 if the operation was successfully completed.
|
|
||||||
* @retval -1 if an error occurred.
|
|
||||||
*
|
|
||||||
* @since February 2016
|
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
|
||||||
*/
|
|
||||||
int obi_save_view(Obiview_p view);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Closes an opened view.
|
|
||||||
*
|
|
||||||
* @warning Uses obi_save_and_close_view() to automatically save the view if it's not already saved in the view file.
|
|
||||||
*
|
|
||||||
* @param view A pointer on the view.
|
|
||||||
*
|
|
||||||
* @returns A value indicating the success of the operation.
|
|
||||||
* @retval 0 if the operation was successfully completed.
|
|
||||||
* @retval -1 if an error occurred.
|
|
||||||
*
|
|
||||||
* @since February 2016
|
|
||||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
|
||||||
*/
|
|
||||||
int obi_close_view(Obiview_p view);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Closes an opened view, and saves it if it is not read-only (meaning it is not already saved in the view file).
|
* @brief Closes an opened view, and saves it if it is not read-only (meaning it is not already saved in the view file).
|
||||||
*
|
*
|
||||||
|
@ -1,16 +1,22 @@
|
|||||||
/*
|
/****************************************************************************
|
||||||
* sse_banded_LCS_alignment.c
|
* LCS alignment of two sequences *
|
||||||
*
|
****************************************************************************/
|
||||||
* Created on: 7 nov. 2012
|
|
||||||
* Author: celine mercier
|
/**
|
||||||
|
* @file sse_banded_LCS_alignment.c
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
* @date November 7th 2012
|
||||||
|
* @brief Functions handling the alignment of two sequences to compute their Longest Common Sequence.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
#include "obierrno.h"
|
#include "obierrno.h"
|
||||||
#include "obidebug.h"
|
#include "obidebug.h"
|
||||||
@ -24,6 +30,231 @@
|
|||||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
*
|
||||||
|
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||||
|
*
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function printing a 128 bits register as 8 16-bits integers.
|
||||||
|
*
|
||||||
|
* @param r The register to print.
|
||||||
|
*
|
||||||
|
* @author Eric Coissac (eric.coissac@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static void printreg(__m128i r);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function extracting a 16-bits integer from a 128 bits register.
|
||||||
|
*
|
||||||
|
* @param r The register to read.
|
||||||
|
* @param p The position at which the integer should be read (between 0 and 7).
|
||||||
|
*
|
||||||
|
* @returns The extracted integer.
|
||||||
|
*
|
||||||
|
* @author Eric Coissac (eric.coissac@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
static inline int extract_reg(__m128i r, int p);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function aligning two sequences, computing the lengths of their Longest Common Subsequence and of their alignment.
|
||||||
|
*
|
||||||
|
* @warning The first argument (seq1) must correspond to the longest sequence.
|
||||||
|
*
|
||||||
|
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||||
|
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||||
|
* @param l1 The length of the first sequence.
|
||||||
|
* @param l2 The length of the second sequence.
|
||||||
|
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
|
||||||
|
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
|
||||||
|
* @param address A pointer, aligned on a 16 bits boundary, on the int array where the initial values for the alignment length are stored,
|
||||||
|
* as prepared for the alignment by initializeAddressWithGaps().
|
||||||
|
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||||
|
* @param ali_length A pointer on the int where the alignment length will be stored.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function aligning two sequences, computing the length of their Longest Common Subsequence (and not the alignment length).
|
||||||
|
*
|
||||||
|
* @warning The first argument (seq1) must correspond to the longest sequence.
|
||||||
|
*
|
||||||
|
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||||
|
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||||
|
* @param l1 The length of the first sequence.
|
||||||
|
* @param l2 The length of the second sequence.
|
||||||
|
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
|
||||||
|
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
|
||||||
|
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function calculating the length of the left band for the banded alignment.
|
||||||
|
*
|
||||||
|
* @param lmax The length of the longest sequence to align.
|
||||||
|
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
|
||||||
|
*
|
||||||
|
* @returns The length of the left band.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int calculateLeftBandLength(int lmax, int LCSmin);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function calculating the length of the right band for the banded alignment.
|
||||||
|
*
|
||||||
|
* @param lmin The length of the shortest sequence to align.
|
||||||
|
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
|
||||||
|
*
|
||||||
|
* @returns The length of the right band.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int calculateRightBandLength(int lmin, int LCSmin);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function calculating the length of the complete band for the banded alignment.
|
||||||
|
*
|
||||||
|
* @param bandLengthRight The length of the right band for the banded alignment, as computed by calculateRightBandLength().
|
||||||
|
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
|
||||||
|
*
|
||||||
|
* @returns The length of the complete band.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function calculating the size to allocate for the int array where the alignment length will be stored in the matrix.
|
||||||
|
*
|
||||||
|
* @param maxLen The length of the longest sequence to align.
|
||||||
|
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
|
||||||
|
*
|
||||||
|
* @returns The size to allocate in bytes.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int calculateSizeToAllocate(int maxLen, int LCSmin);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function initializing the int array corresponding to a sequence to align with default values.
|
||||||
|
*
|
||||||
|
* @param seq The int array corresponding to the sequence to align, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||||
|
* @param size The number of positions to initialize.
|
||||||
|
* @param iniValue The value that the positions should be initialized to.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
void iniSeq(int16_t* seq, int size, int16_t iniValue);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function building the int array corresponding to a sequence to align.
|
||||||
|
*
|
||||||
|
* Each nucleotide is stored as a short int (int16_t).
|
||||||
|
*
|
||||||
|
* @param seq A pointer on the allocated int array.
|
||||||
|
* @param s A pointer on the character string corresponding to the sequence.
|
||||||
|
* @param l The length of the sequence.
|
||||||
|
* @param reverse A boolean indicating whether the sequence should be written reversed
|
||||||
|
* (for the second sequence to align).
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
void putSeqInSeq(int16_t* seq, char* s, int l, bool reverse);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function building the int array corresponding to an obiblob containing a sequence.
|
||||||
|
*
|
||||||
|
* Each nucleotide is stored as a short int (int16_t).
|
||||||
|
*
|
||||||
|
* @param seq A pointer on the allocated int array.
|
||||||
|
* @param b A pointer on the obiblob containing the sequence.
|
||||||
|
* @param l The length of the (decoded) sequence.
|
||||||
|
* @param reverse A boolean indicating whether the sequence should be written reversed
|
||||||
|
* (for the second sequence to align).
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
void putBlobInSeq(int16_t* seq, Obi_blob_p b, int l, bool reverse);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function preparing an int array with the initial values for the alignment lengths before the alignment.
|
||||||
|
*
|
||||||
|
* The int array containing the initial alignment lengths (corresponding to the first line of the diagonalized band of the alignment matrix)
|
||||||
|
* needs to be initialized with external gap lengths before the alignment.
|
||||||
|
*
|
||||||
|
* @param address A pointer, aligned on a 16 bits boundary, on the int array where the initial values for the alignment length are to be stored.
|
||||||
|
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
|
||||||
|
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
|
||||||
|
* @param lmax The length of the longest sequence to align.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int lmax);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function aligning two sequences, computing the lengths of their Longest Common Subsequence and of their alignment.
|
||||||
|
*
|
||||||
|
* @warning The first argument (seq1) must correspond to the longest sequence.
|
||||||
|
*
|
||||||
|
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||||
|
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||||
|
* @param l1 The length of the first sequence.
|
||||||
|
* @param l2 The length of the second sequence.
|
||||||
|
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||||
|
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
||||||
|
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||||
|
* @param address A pointer, aligned on a 16 bits boundary, on an allocated int array where the initial values for the alignment length will be stored.
|
||||||
|
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
|
||||||
|
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||||
|
* @param ali_length A pointer on the int where the alignment length will be stored.
|
||||||
|
*
|
||||||
|
* @returns The alignment score (normalized according to the parameters).
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/************************************************************************
|
||||||
|
*
|
||||||
|
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||||
|
*
|
||||||
|
************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
static void printreg(__m128i r)
|
static void printreg(__m128i r)
|
||||||
{
|
{
|
||||||
int16_t a0,a1,a2,a3,a4,a5,a6,a7;
|
int16_t a0,a1,a2,a3,a4,a5,a6,a7;
|
||||||
@ -61,7 +292,6 @@ static inline int extract_reg(__m128i r, int p)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// TODO warning on length order
|
|
||||||
void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length)
|
void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length)
|
||||||
{
|
{
|
||||||
register int j;
|
register int j;
|
||||||
@ -287,7 +517,6 @@ void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// TODO warning on length order
|
|
||||||
void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length)
|
void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length)
|
||||||
{
|
{
|
||||||
register int j;
|
register int j;
|
||||||
@ -446,8 +675,7 @@ int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// TODO that's gonna be fun to doc
|
int calculateSizeToAllocate(int maxLen, int LCSmin)
|
||||||
int calculateSizeToAllocate(int maxLen, int minLen, int LCSmin)
|
|
||||||
{
|
{
|
||||||
int size;
|
int size;
|
||||||
|
|
||||||
@ -522,13 +750,13 @@ void putBlobInSeq(int16_t* seq, Obi_blob_p b, int l, bool reverse)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int l1)
|
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int lmax)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int address_00, x_address_10, address_01, address_01_shifted;
|
int address_00, x_address_10, address_01, address_01_shifted;
|
||||||
int numberOfRegistersPerLine;
|
int numberOfRegistersPerLine;
|
||||||
int bm;
|
int bm;
|
||||||
int value=INT16_MAX-l1;
|
int value=INT16_MAX-lmax;
|
||||||
|
|
||||||
numberOfRegistersPerLine = bandLengthTotal / 8;
|
numberOfRegistersPerLine = bandLengthTotal / 8;
|
||||||
bm = bandLengthLeft%2;
|
bm = bandLengthLeft%2;
|
||||||
@ -556,7 +784,6 @@ void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLe
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// TODO warning on length order
|
|
||||||
double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length)
|
double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length)
|
||||||
{
|
{
|
||||||
double id;
|
double id;
|
||||||
@ -610,10 +837,14 @@ double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool n
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
// PUBLIC FUNCTIONS
|
/**********************************************************************
|
||||||
|
*
|
||||||
|
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
|
||||||
|
*
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool similarity_mode)
|
int calculateLCSmin(int lmax, int lmin, double threshold, bool normalize, int reference, bool similarity_mode)
|
||||||
{
|
{
|
||||||
int LCSmin;
|
int LCSmin;
|
||||||
|
|
||||||
@ -622,16 +853,16 @@ int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int refere
|
|||||||
if (normalize)
|
if (normalize)
|
||||||
{
|
{
|
||||||
if (reference == MINLEN)
|
if (reference == MINLEN)
|
||||||
LCSmin = threshold*l2;
|
LCSmin = threshold*lmin;
|
||||||
else // ref = maxlen or alilen
|
else // ref = maxlen or alilen
|
||||||
LCSmin = threshold*l1;
|
LCSmin = threshold*lmax;
|
||||||
}
|
}
|
||||||
else if (similarity_mode)
|
else if (similarity_mode)
|
||||||
LCSmin = threshold;
|
LCSmin = threshold;
|
||||||
else if (reference == MINLEN) // not similarity_mode
|
else if (reference == MINLEN) // not similarity_mode
|
||||||
LCSmin = l2 - threshold;
|
LCSmin = lmin - threshold;
|
||||||
else // not similarity_mode and ref = maxlen or alilen
|
else // not similarity_mode and ref = maxlen or alilen
|
||||||
LCSmin = l1 - threshold;
|
LCSmin = lmax - threshold;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
LCSmin = 0;
|
LCSmin = 0;
|
||||||
@ -669,6 +900,14 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
|
|||||||
lmin = l1;
|
lmin = l1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check that the sequences are not greater than what can be aligned using the 16 bits registers (as the LCS and alignment lengths are kept on 16 bits)
|
||||||
|
if (lmax > SHRT_MAX)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_ALIGN_ERROR);
|
||||||
|
obidebug(1, "\nError: can not align sequences longer than %d (as the LCS and alignment lengths are kept on 16 bits)", SHRT_MAX);
|
||||||
|
return 0; // TODO DOUBLE_MIN to flag error
|
||||||
|
}
|
||||||
|
|
||||||
// If the score is expressed as a normalized distance, get the corresponding identity
|
// If the score is expressed as a normalized distance, get the corresponding identity
|
||||||
if (!similarity_mode && normalize)
|
if (!similarity_mode && normalize)
|
||||||
threshold = 1.0 - threshold;
|
threshold = 1.0 - threshold;
|
||||||
@ -679,7 +918,7 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
|
|||||||
// Allocate space for matrix band if the alignment length must be computed
|
// Allocate space for matrix band if the alignment length must be computed
|
||||||
if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
|
if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
|
||||||
{
|
{
|
||||||
sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
|
sizeToAllocateForBand = calculateSizeToAllocate(lmax, LCSmin);
|
||||||
address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
|
address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
|
||||||
if (address == NULL)
|
if (address == NULL)
|
||||||
{
|
{
|
||||||
@ -764,6 +1003,14 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
|
|||||||
lmin = l1;
|
lmin = l1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check that the sequences are not greater than what can be aligned using the 16 bits registers (as the LCS and alignment lengths are kept on 16 bits)
|
||||||
|
if (lmax > SHRT_MAX)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_ALIGN_ERROR);
|
||||||
|
obidebug(1, "\nError: can not align sequences longer than %d (as the LCS and alignment lengths are kept on 16 bits)", SHRT_MAX);
|
||||||
|
return 0; // TODO DOUBLE_MIN to flag error
|
||||||
|
}
|
||||||
|
|
||||||
// If the score is expressed as a normalized distance, get the corresponding identity
|
// If the score is expressed as a normalized distance, get the corresponding identity
|
||||||
if (!similarity_mode && normalize)
|
if (!similarity_mode && normalize)
|
||||||
threshold = 1.0 - threshold;
|
threshold = 1.0 - threshold;
|
||||||
@ -774,13 +1021,13 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
|
|||||||
// Allocate space for matrix band if the alignment length must be computed
|
// Allocate space for matrix band if the alignment length must be computed
|
||||||
if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
|
if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
|
||||||
{
|
{
|
||||||
sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
|
sizeToAllocateForBand = calculateSizeToAllocate(lmax, LCSmin);
|
||||||
address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
|
address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
|
||||||
if (address == NULL)
|
if (address == NULL)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_MALLOC_ERROR);
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
obidebug(1, "\nError getting a memory address aligned on 16 bytes boundary");
|
obidebug(1, "\nError getting a memory address aligned on a 16 bits boundary");
|
||||||
return 0; // TODO DOUBLE_MIN
|
return 0; // TODO DOUBLE_MIN to flag error
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,10 +1,15 @@
|
|||||||
/*
|
/****************************************************************************
|
||||||
* sse_banded_LCS_alignment.h
|
* LCS alignment of two sequences header file *
|
||||||
*
|
****************************************************************************/
|
||||||
* Created on: november 29, 2012
|
|
||||||
* Author: mercier
|
/**
|
||||||
|
* @file sse_banded_LCS_alignment.h
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
* @date November 7th 2012
|
||||||
|
* @brief header file for the functions handling the alignment of two sequences to compute their Longest Common Sequence.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#ifndef SSE_BANDED_LCS_ALIGNMENT_H_
|
#ifndef SSE_BANDED_LCS_ALIGNMENT_H_
|
||||||
#define SSE_BANDED_LCS_ALIGNMENT_H_
|
#define SSE_BANDED_LCS_ALIGNMENT_H_
|
||||||
|
|
||||||
@ -15,13 +20,97 @@
|
|||||||
#include "obiblob.h"
|
#include "obiblob.h"
|
||||||
|
|
||||||
|
|
||||||
#define ALILEN (0) // TODO enum
|
/**
|
||||||
|
* @brief Macros for reference lengths to use when aligning.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Eric Coissac (eric.coissac@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
#define ALILEN (0)
|
||||||
#define MAXLEN (1)
|
#define MAXLEN (1)
|
||||||
#define MINLEN (2)
|
#define MINLEN (2)
|
||||||
|
|
||||||
// TODO doc
|
|
||||||
int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool lcsmode);
|
/**
|
||||||
|
* @brief Function calculating the minimum length of the Longest Common Subsequence between two sequences to be above a chosen score threshold.
|
||||||
|
*
|
||||||
|
* @warning The first argument (lmax) must correspond to length of the longest sequence.
|
||||||
|
*
|
||||||
|
* @param lmax The length of the longest sequence to align.
|
||||||
|
* @param lmin The length of the shortest sequence to align.
|
||||||
|
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||||
|
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||||
|
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||||
|
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||||
|
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||||
|
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||||
|
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
|
||||||
|
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||||
|
*
|
||||||
|
* @returns The minimum length of the Longest Common Subsequence between two sequences to be above the chosen score threshold.
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int calculateLCSmin(int lmax, int lmin, double threshold, bool normalize, int reference, bool similarity_mode);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function aligning two sequences.
|
||||||
|
*
|
||||||
|
* The alignment algorithm is a banded global alignment algorithm, a modified version of the classical Needleman and Wunsch algorithm,
|
||||||
|
* and uses indices based on the length of the Longest Common Subsequence between the two sequences.
|
||||||
|
*
|
||||||
|
* Note: the sequences do not need to be ordered (e.g. with the longest sequence as first argument).
|
||||||
|
*
|
||||||
|
* @param seq1 A pointer on the character string corresponding to the first sequence.
|
||||||
|
* @param seq2 A pointer on the character string corresponding to the second sequence.
|
||||||
|
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||||
|
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||||
|
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||||
|
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||||
|
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||||
|
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||||
|
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
|
||||||
|
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||||
|
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||||
|
* @param ali_length A pointer on the int where the alignment length will be stored.
|
||||||
|
*
|
||||||
|
* @returns The alignment score (normalized according to the parameters).
|
||||||
|
*
|
||||||
|
* @since 2012
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
|
double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Function aligning two sequences encoded in obiblobs.
|
||||||
|
*
|
||||||
|
* The alignment algorithm is a banded global alignment algorithm, a modified version of the classical Needleman and Wunsch algorithm,
|
||||||
|
* and uses indices based on the length of the Longest Common Subsequence between the two sequences.
|
||||||
|
*
|
||||||
|
* Note: the obiblobs do not need to be ordered (e.g. with the obiblob containing the longest sequence as first argument).
|
||||||
|
*
|
||||||
|
* @param seq1 A pointer on the blob containing the first sequence.
|
||||||
|
* @param seq2 A pointer on the blob containing the second sequence.
|
||||||
|
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||||
|
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||||
|
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||||
|
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||||
|
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||||
|
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||||
|
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
|
||||||
|
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||||
|
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||||
|
* @param ali_length A pointer on the int where the alignment length will be stored.
|
||||||
|
*
|
||||||
|
* @returns The alignment score (normalized according to the parameters).
|
||||||
|
*
|
||||||
|
* @since December 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
|
double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -8,8 +8,6 @@
|
|||||||
#include "obidmscolumn.h"
|
#include "obidmscolumn.h"
|
||||||
#include "obiview.h"
|
#include "obiview.h"
|
||||||
|
|
||||||
//#include "../libutils/utilities.h"
|
|
||||||
//#include "../libfasta/sequence.h"
|
|
||||||
|
|
||||||
|
|
||||||
inline static uchar_v hash4m128(uchar_v frag)
|
inline static uchar_v hash4m128(uchar_v frag)
|
||||||
@ -242,7 +240,7 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t se
|
|||||||
|
|
||||||
fprintf(stderr,"Building kmer tables...");
|
fprintf(stderr,"Building kmer tables...");
|
||||||
|
|
||||||
seq_count = (seq_col->header)->lines_used;
|
seq_count = (view->infos)->line_count;
|
||||||
|
|
||||||
// Allocate memory for the table structure
|
// Allocate memory for the table structure
|
||||||
ktable = (Kmer_table_p) malloc(sizeof(Kmer_table_t) * seq_count);
|
ktable = (Kmer_table_p) malloc(sizeof(Kmer_table_t) * seq_count);
|
||||||
@ -267,6 +265,44 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t se
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Kmer_table_p hash_two_seq_columns(Obiview_p view1, OBIDMS_column_p seq1_col, index_t seq1_idx,
|
||||||
|
Obiview_p view2, OBIDMS_column_p seq2_col, index_t seq2_idx)
|
||||||
|
{
|
||||||
|
size_t seq1_count;
|
||||||
|
size_t seq2_count;
|
||||||
|
Kmer_table_p ktable1;
|
||||||
|
Kmer_table_p ktable2;
|
||||||
|
Kmer_table_p ktable;
|
||||||
|
|
||||||
|
seq1_count = (view1->infos)->line_count;
|
||||||
|
seq2_count = (view2->infos)->line_count;
|
||||||
|
|
||||||
|
// Build the two tables then concatenate them
|
||||||
|
ktable1 = hash_seq_column(view1, seq1_col, seq1_idx);
|
||||||
|
if (ktable1 == NULL)
|
||||||
|
return NULL;
|
||||||
|
ktable2 = hash_seq_column(view2, seq2_col, seq2_idx);
|
||||||
|
if (ktable2 == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
// Realloc to hold the 2 tables
|
||||||
|
ktable = realloc(ktable1, sizeof(Kmer_table_t) * (seq1_count + seq2_count));
|
||||||
|
if (ktable == NULL)
|
||||||
|
{
|
||||||
|
free_kmer_tables(ktable2, seq2_count);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Concatenate
|
||||||
|
memcpy(ktable+seq1_count, ktable2, sizeof(Kmer_table_t) * seq2_count);
|
||||||
|
|
||||||
|
// Free copied table
|
||||||
|
free(ktable2);
|
||||||
|
|
||||||
|
return ktable;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void free_kmer_tables(Kmer_table_p ktable, size_t count)
|
void free_kmer_tables(Kmer_table_p ktable, size_t count)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
|
@ -18,7 +18,11 @@ typedef struct {
|
|||||||
} Kmer_table_t, *Kmer_table_p;
|
} Kmer_table_t, *Kmer_table_p;
|
||||||
|
|
||||||
|
|
||||||
|
// TODO doc
|
||||||
|
|
||||||
Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t seq_idx);
|
Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t seq_idx);
|
||||||
|
Kmer_table_p hash_two_seq_columns(Obiview_p view1, OBIDMS_column_p seq1_col, index_t seq1_idx,
|
||||||
|
Obiview_p view2, OBIDMS_column_p seq2_col, index_t seq2_idx);
|
||||||
void align_filters(Kmer_table_p ktable, Obi_blob_p seq1, Obi_blob_p seq2, index_t idx1, index_t idx2, double threshold, bool normalize, int reference, bool similarity_mode, double* score, int* LCSmin, bool can_be_identical);
|
void align_filters(Kmer_table_p ktable, Obi_blob_p seq1, Obi_blob_p seq2, index_t idx1, index_t idx2, double threshold, bool normalize, int reference, bool similarity_mode, double* score, int* LCSmin, bool can_be_identical);
|
||||||
void free_kmer_tables(Kmer_table_p ktable, size_t count);
|
void free_kmer_tables(Kmer_table_p ktable, size_t count);
|
||||||
|
|
||||||
|
190
src/utils.c
190
src/utils.c
@ -116,3 +116,193 @@ void* obi_get_memory_aligned_on_16(int size, int* shift)
|
|||||||
return (memory);
|
return (memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A generic implementation of binary search for the Linux kernel
|
||||||
|
*
|
||||||
|
* Copyright (C) 2008-2009 Ksplice, Inc.
|
||||||
|
* Author: Tim Abbott <tabbott@ksplice.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License as
|
||||||
|
* published by the Free Software Foundation; version 2.
|
||||||
|
*/
|
||||||
|
void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data,
|
||||||
|
int (*cmp)(const void *key, const void *elt, const void* user_data))
|
||||||
|
{
|
||||||
|
size_t start = 0;
|
||||||
|
size_t end = num;
|
||||||
|
size_t mid;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
while (start < end)
|
||||||
|
{
|
||||||
|
mid = start + (end - start) / 2;
|
||||||
|
result = cmp(key, base + mid * size, user_data);
|
||||||
|
if (result < 0)
|
||||||
|
end = mid;
|
||||||
|
else if (result > 0)
|
||||||
|
start = mid + 1;
|
||||||
|
else
|
||||||
|
return (void*)base + mid * size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 1992, 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define MIN(a,b) ((a) < (b) ? a : b)
|
||||||
|
|
||||||
|
#define swapcode(TYPE, parmi, parmj, n) { \
|
||||||
|
long i = (n) / sizeof (TYPE); \
|
||||||
|
register TYPE *pi = (TYPE *) (parmi); \
|
||||||
|
register TYPE *pj = (TYPE *) (parmj); \
|
||||||
|
do { \
|
||||||
|
register TYPE t = *pi; \
|
||||||
|
*pi++ = *pj; \
|
||||||
|
*pj++ = t; \
|
||||||
|
} while (--i > 0); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
|
||||||
|
es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
|
||||||
|
|
||||||
|
static __inline void
|
||||||
|
swapfunc(char *a, char *b, int n, int swaptype)
|
||||||
|
{
|
||||||
|
if (swaptype <= 1)
|
||||||
|
swapcode(long, a, b, n)
|
||||||
|
else
|
||||||
|
swapcode(char, a, b, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
#define swap(a, b) \
|
||||||
|
if (swaptype == 0) { \
|
||||||
|
long t = *(long *)(a); \
|
||||||
|
*(long *)(a) = *(long *)(b); \
|
||||||
|
*(long *)(b) = t; \
|
||||||
|
} else \
|
||||||
|
swapfunc(a, b, es, swaptype)
|
||||||
|
|
||||||
|
#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
|
||||||
|
|
||||||
|
static __inline char *
|
||||||
|
med3(char *a, char *b, char *c, const void *user_data, int (*cmp)(const void *, const void *, const void *))
|
||||||
|
{
|
||||||
|
return cmp(a, b, user_data) < 0 ?
|
||||||
|
(cmp(b, c, user_data) < 0 ? b : (cmp(a, c, user_data) < 0 ? c : a ))
|
||||||
|
:(cmp(b, c, user_data) > 0 ? b : (cmp(a, c, user_data) < 0 ? a : c ));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *))
|
||||||
|
{
|
||||||
|
char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
|
||||||
|
int d, r, swaptype, swap_cnt;
|
||||||
|
register char *a = aa;
|
||||||
|
|
||||||
|
loop: SWAPINIT(a, es);
|
||||||
|
swap_cnt = 0;
|
||||||
|
if (n < 7) {
|
||||||
|
for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
|
||||||
|
for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0;
|
||||||
|
pl -= es)
|
||||||
|
swap(pl, pl - es);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pm = (char *)a + (n / 2) * es;
|
||||||
|
if (n > 7) {
|
||||||
|
pl = (char *)a;
|
||||||
|
pn = (char *)a + (n - 1) * es;
|
||||||
|
if (n > 40) {
|
||||||
|
d = (n / 8) * es;
|
||||||
|
pl = med3(pl, pl + d, pl + 2 * d, user_data, cmp);
|
||||||
|
pm = med3(pm - d, pm, pm + d, user_data, cmp);
|
||||||
|
pn = med3(pn - 2 * d, pn - d, pn, user_data, cmp);
|
||||||
|
}
|
||||||
|
pm = med3(pl, pm, pn, user_data, cmp);
|
||||||
|
}
|
||||||
|
swap(a, pm);
|
||||||
|
pa = pb = (char *)a + es;
|
||||||
|
|
||||||
|
pc = pd = (char *)a + (n - 1) * es;
|
||||||
|
for (;;) {
|
||||||
|
while (pb <= pc && (r = cmp(pb, a, user_data)) <= 0) {
|
||||||
|
if (r == 0) {
|
||||||
|
swap_cnt = 1;
|
||||||
|
swap(pa, pb);
|
||||||
|
pa += es;
|
||||||
|
}
|
||||||
|
pb += es;
|
||||||
|
}
|
||||||
|
while (pb <= pc && (r = cmp(pc, a, user_data)) >= 0) {
|
||||||
|
if (r == 0) {
|
||||||
|
swap_cnt = 1;
|
||||||
|
swap(pc, pd);
|
||||||
|
pd -= es;
|
||||||
|
}
|
||||||
|
pc -= es;
|
||||||
|
}
|
||||||
|
if (pb > pc)
|
||||||
|
break;
|
||||||
|
swap(pb, pc);
|
||||||
|
swap_cnt = 1;
|
||||||
|
pb += es;
|
||||||
|
pc -= es;
|
||||||
|
}
|
||||||
|
if (swap_cnt == 0) { /* Switch to insertion sort */
|
||||||
|
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
|
||||||
|
for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0;
|
||||||
|
pl -= es)
|
||||||
|
swap(pl, pl - es);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pn = (char *)a + n * es;
|
||||||
|
r = MIN(pa - (char *)a, pb - pa);
|
||||||
|
vecswap(a, pb - r, r);
|
||||||
|
r = MIN((long)(pd - pc), (long)(pn - pd - es));
|
||||||
|
vecswap(pb, pn - r, r);
|
||||||
|
if ((r = pb - pa) > (int)es)
|
||||||
|
qsort_user_data(a, r / es, es, user_data, cmp);
|
||||||
|
if ((r = pd - pc) > (int)es) {
|
||||||
|
/* Iterate rather than recurse to save stack space */
|
||||||
|
a = pn - r;
|
||||||
|
n = r / es;
|
||||||
|
goto loop;
|
||||||
|
}
|
||||||
|
/* qsort(pn - r, r / es, es, cmp);*/
|
||||||
|
}
|
||||||
|
|
||||||
|
37
src/utils.h
37
src/utils.h
@ -74,4 +74,41 @@ char* obi_format_date(time_t date);
|
|||||||
void* obi_get_memory_aligned_on_16(int size, int* shift);
|
void* obi_get_memory_aligned_on_16(int size, int* shift);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Version of quick sort modified to allow the user to provide an
|
||||||
|
* additional pointer sent to the comparison function.
|
||||||
|
*
|
||||||
|
* @param key This is the pointer to the object that serves as key for the search, type-casted as a void*.
|
||||||
|
* @param base This is the pointer to the first object of the array where the search is performed, type-casted as a void*.
|
||||||
|
* @param num This is the number of elements in the array pointed by base.
|
||||||
|
* @param size This is the size in bytes of each element in the array.
|
||||||
|
* @param user_data This is an additional pointer passed to the comparison function.
|
||||||
|
* @param cmp This is the function that compares two elements, eventually with an additional pointer.
|
||||||
|
*
|
||||||
|
* @returns A pointer to an entry in the array that matches the search key.
|
||||||
|
* @retval NULL if key is not found.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author original code modified by Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data,
|
||||||
|
int (*cmp)(const void *key, const void *elt, const void* user_data));
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Version of quick sort modified to allow the user to provide an
|
||||||
|
* additional pointer sent to the comparison function.
|
||||||
|
*
|
||||||
|
* @param aa This is the pointer to the first element of the array to be sorted.
|
||||||
|
* @param n This is the number of elements in the array pointed by base.
|
||||||
|
* @param es This is the size in bytes of each element in the array.
|
||||||
|
* @param user_data This is an additional pointer passed to the comparison function.
|
||||||
|
* @param cmp This is the function that compares two elements, eventually with an additional pointer.
|
||||||
|
*
|
||||||
|
* @since January 2017
|
||||||
|
* @author original code modified by Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
void qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *));
|
||||||
|
|
||||||
|
|
||||||
#endif /* UTILS_H_ */
|
#endif /* UTILS_H_ */
|
||||||
|
Reference in New Issue
Block a user