Merged master fixed conflict.
This commit is contained in:
65
python/obitools3/commands/lcs.cfiles
Normal file
65
python/obitools3/commands/lcs.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/hashtable.h
|
||||
../../../src/hashtable.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obi_align.h
|
||||
../../../src/obi_align.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_blob.c
|
||||
../../../src/obidmscolumn_blob.h
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_qual.h
|
||||
../../../src/obidmscolumn_qual.c
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/sse_banded_LCS_alignment.h
|
||||
../../../src/sse_banded_LCS_alignment.c
|
||||
../../../src/uint8_indexer.h
|
||||
../../../src/uint8_indexer.c
|
||||
../../../src/upperband.h
|
||||
../../../src/upperband.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
@ -4,7 +4,8 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.obidms._obidms cimport OBIDMS # TODO cimport doesn't work
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
from obitools3.obidms.capi.obialign cimport obi_lcs_align_one_column
|
||||
from obitools3.obidms.capi.obialign cimport obi_lcs_align_one_column, \
|
||||
obi_lcs_align_two_columns
|
||||
|
||||
|
||||
import time
|
||||
@ -146,6 +147,13 @@ def addOptions(parser):
|
||||
default=False,
|
||||
help="Sequence counts are written in the output view. Default: they are not written.")
|
||||
|
||||
group.add_argument('--thread-count','-p', # TODO should probably be in a specific option group
|
||||
action="store", dest="align:threadcount",
|
||||
metavar='<THREAD COUNT>',
|
||||
default=1,
|
||||
type=int,
|
||||
help="Number of threads to use for the computation. Default: one.")
|
||||
|
||||
|
||||
cpdef align(str dms_n,
|
||||
str input_view_1_n, str output_view_n,
|
||||
@ -156,12 +164,13 @@ cpdef align(str dms_n,
|
||||
double threshold=0.0, bint normalize=True,
|
||||
int reference=0, bint similarity_mode=True,
|
||||
bint print_seq=False, bint print_count=False,
|
||||
comments="") :
|
||||
comments="",
|
||||
int thread_count=1) :
|
||||
|
||||
cdef OBIDMS d
|
||||
d = OBIDMS(dms_n)
|
||||
|
||||
# Align 1 column (2 columns not implemented yet)
|
||||
if input_view_2_n == "" and input_column_2_n == "" :
|
||||
if obi_lcs_align_one_column(d._pointer, \
|
||||
str2bytes(input_view_1_n), \
|
||||
str2bytes(input_column_1_n), \
|
||||
@ -171,6 +180,23 @@ cpdef align(str dms_n,
|
||||
str2bytes(comments), \
|
||||
print_seq, \
|
||||
print_count, \
|
||||
threshold, normalize, reference, similarity_mode,
|
||||
thread_count) < 0 :
|
||||
raise Exception("Error aligning sequences")
|
||||
else :
|
||||
if obi_lcs_align_two_columns(d._pointer, \
|
||||
str2bytes(input_view_1_n), \
|
||||
str2bytes(input_view_2_n), \
|
||||
str2bytes(input_column_1_n), \
|
||||
str2bytes(input_column_2_n), \
|
||||
str2bytes(input_elt_1_n), \
|
||||
str2bytes(input_elt_2_n), \
|
||||
str2bytes(id_column_1_n), \
|
||||
str2bytes(id_column_2_n), \
|
||||
str2bytes(output_view_n), \
|
||||
str2bytes(comments), \
|
||||
print_seq, \
|
||||
print_count, \
|
||||
threshold, normalize, reference, similarity_mode) < 0 :
|
||||
raise Exception("Error aligning sequences")
|
||||
|
||||
@ -199,7 +225,8 @@ def run(config):
|
||||
similarity_mode = config['align']['similarity'], \
|
||||
print_seq = config['align']['printseq'], \
|
||||
print_count = config['align']['printcount'], \
|
||||
comments = comments)
|
||||
comments = comments, \
|
||||
thread_count = config['align']['threadcount'])
|
||||
|
||||
print("Done.")
|
||||
|
||||
|
@ -97,8 +97,7 @@ def test_set_and_get(config, infos):
|
||||
return
|
||||
idx = random_int(config)
|
||||
value = infos['random_generator'][data_type](config)
|
||||
|
||||
if len(element_names) > 1 :
|
||||
if col.nb_elements_per_line > 1 :
|
||||
elt = random.choice(element_names)
|
||||
col[idx][elt] = value
|
||||
assert col[idx][elt] == value, "Set value != gotten value "+str(col[idx][elt])+" != "+str(value)
|
||||
@ -187,6 +186,7 @@ def create_random_column(config, infos) :
|
||||
elements_names = []
|
||||
for i in range(nb_elements_per_line) :
|
||||
elements_names.append(random_unique_element_name(config, infos))
|
||||
elements_names = random.choice([None, elements_names])
|
||||
name = random_unique_name(infos)
|
||||
infos['view'].add_column(name,
|
||||
alias=alias,
|
||||
@ -359,6 +359,8 @@ def run(config):
|
||||
|
||||
print("Initializing the DMS and the first view...")
|
||||
|
||||
shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
|
||||
|
||||
ini_dms_and_first_view(config, infos)
|
||||
print_test(config, repr(infos['view']))
|
||||
|
||||
|
@ -7,7 +7,8 @@ from .capi.obidms cimport obi_dms, \
|
||||
|
||||
from .capi.obidmscolumn cimport obi_close_column, \
|
||||
OBIDMS_column_p, \
|
||||
OBIDMS_column_header_p
|
||||
OBIDMS_column_header_p, \
|
||||
obi_get_elements_names
|
||||
|
||||
from .capi.obiutils cimport obi_format_date
|
||||
|
||||
@ -73,7 +74,7 @@ from .capi.obiview cimport Obiview_p, \
|
||||
DEFINITION_COLUMN, \
|
||||
QUALITY_COLUMN
|
||||
|
||||
from libc.stdlib cimport malloc
|
||||
from libc.stdlib cimport malloc, free
|
||||
|
||||
|
||||
|
||||
@ -100,17 +101,17 @@ cdef class OBIDMS_column :
|
||||
def __getitem__(self, index_t line_nb):
|
||||
return self.get_line(line_nb)
|
||||
|
||||
def __len__(self):
|
||||
return self.lines_used
|
||||
def __len__(self): # TODO discuss
|
||||
return self._view.line_count
|
||||
|
||||
def __sizeof__(self):
|
||||
return ((self._pointer)[0].header.header_size + (self._pointer)[0].header.data_size)
|
||||
|
||||
def __iter__(self):
|
||||
def __iter__(self): # TODO discuss
|
||||
# Declarations
|
||||
cdef index_t line_nb
|
||||
# Yield each line
|
||||
for line_nb in range(self.lines_used):
|
||||
for line_nb in range(self._view.line_count):
|
||||
yield self.get_line(line_nb)
|
||||
|
||||
def __str__(self) :
|
||||
@ -138,7 +139,12 @@ cdef class OBIDMS_column :
|
||||
# elements_names property getter
|
||||
@property
|
||||
def elements_names(self):
|
||||
return (bytes2str(((self._pointer)[0].header).elements_names)).split(';')
|
||||
cdef char* elts_names_b
|
||||
cdef str elts_names
|
||||
elts_names_b = obi_get_elements_names((self._pointer)[0])
|
||||
elts_names = bytes2str(elts_names_b)
|
||||
free(<char*>elts_names_b)
|
||||
return elts_names.split(';')
|
||||
|
||||
# nb_elements_per_line property getter
|
||||
@property
|
||||
@ -160,11 +166,6 @@ cdef class OBIDMS_column :
|
||||
def version(self):
|
||||
return ((self._pointer)[0].header).version
|
||||
|
||||
# lines_used property getter
|
||||
@property
|
||||
def lines_used(self):
|
||||
return (self._pointer)[0].header.lines_used
|
||||
|
||||
# comments property getter
|
||||
@property
|
||||
def comments(self):
|
||||
@ -268,15 +269,6 @@ cdef class OBIDMS_column_line :
|
||||
######################################################################################################
|
||||
|
||||
|
||||
|
||||
|
||||
######################################################################################################
|
||||
|
||||
|
||||
|
||||
######################################################################################################
|
||||
|
||||
|
||||
cdef class OBIView_line :
|
||||
|
||||
def __init__(self, OBIView view, index_t line_nb) :
|
||||
@ -424,7 +416,7 @@ cdef class OBIDMS :
|
||||
cdef int i, j
|
||||
cdef str column_name
|
||||
|
||||
view_infos_p = obi_view_map_file(self._pointer, str2bytes(view_name))
|
||||
view_infos_p = obi_view_map_file(self._pointer, str2bytes(view_name), True)
|
||||
view_infos_d = {}
|
||||
view_infos_d["name"] = bytes2str(view_infos_p.name)
|
||||
view_infos_d["comments"] = bytes2str(view_infos_p.comments)
|
||||
|
@ -22,7 +22,7 @@ cdef class OBIDMS_column_str(OBIDMS_column):
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
@ -46,7 +46,7 @@ cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
|
||||
return result
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
@ -65,7 +65,7 @@ cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file.
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
|
@ -18,3 +18,4 @@ cdef class OBI_Taxonomy :
|
||||
cdef class OBI_Taxon :
|
||||
|
||||
cdef ecotx_t* _pointer
|
||||
cdef OBI_Taxonomy _tax
|
||||
|
@ -7,10 +7,10 @@ from .capi.obitaxonomy cimport obi_read_taxonomy, \
|
||||
obi_write_taxonomy, \
|
||||
obi_close_taxonomy, \
|
||||
obi_taxo_get_taxon_with_taxid, \
|
||||
obi_taxonomy_add_local_taxon, \
|
||||
obi_taxo_add_local_taxon, \
|
||||
obi_taxo_add_preferred_name_with_taxon, \
|
||||
ecotx_t
|
||||
|
||||
|
||||
from ._obidms cimport OBIDMS
|
||||
|
||||
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
||||
@ -42,7 +42,7 @@ cdef class OBI_Taxonomy :
|
||||
if taxon_p == NULL :
|
||||
raise Exception("Taxon not found")
|
||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
return OBI_Taxon(taxon_capsule)
|
||||
return OBI_Taxon(taxon_capsule, self)
|
||||
else :
|
||||
raise Exception("Not implemented")
|
||||
|
||||
@ -60,7 +60,7 @@ cdef class OBI_Taxonomy :
|
||||
for t in range(self._pointer.taxa.count):
|
||||
taxon_p = <ecotx_t*> (taxa+t)
|
||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
yield OBI_Taxon(taxon_capsule)
|
||||
yield OBI_Taxon(taxon_capsule, self)
|
||||
|
||||
|
||||
cpdef write(self, str prefix) :
|
||||
@ -70,7 +70,7 @@ cdef class OBI_Taxonomy :
|
||||
|
||||
cpdef int add_taxon(self, str name, str rank_name, int parent_taxid, int min_taxid=10000000) :
|
||||
cdef int taxid
|
||||
taxid = obi_taxonomy_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
|
||||
taxid = obi_taxo_add_local_taxon(self._pointer, str2bytes(name), str2bytes(rank_name), parent_taxid, min_taxid)
|
||||
if taxid < 0 :
|
||||
raise Exception("Error adding a new taxon to the taxonomy")
|
||||
else :
|
||||
@ -85,10 +85,11 @@ cdef class OBI_Taxonomy :
|
||||
|
||||
cdef class OBI_Taxon : # TODO dict subclass?
|
||||
|
||||
def __init__(self, object taxon_capsule) :
|
||||
def __init__(self, object taxon_capsule, OBI_Taxonomy tax) :
|
||||
self._pointer = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
|
||||
if self._pointer == NULL :
|
||||
raise Exception("Error reading the taxonomy")
|
||||
raise Exception("Error reading a taxon (NULL pointer)")
|
||||
self._tax = tax
|
||||
|
||||
# name property getter
|
||||
@property
|
||||
@ -115,12 +116,23 @@ cdef class OBI_Taxon : # TODO dict subclass?
|
||||
def parent(self):
|
||||
cdef object parent_capsule
|
||||
parent_capsule = PyCapsule_New(self._pointer.parent, NULL, NULL)
|
||||
return OBI_Taxon(parent_capsule)
|
||||
return OBI_Taxon(parent_capsule, self._tax)
|
||||
|
||||
# preferred name property getter and setter
|
||||
@property
|
||||
def preferred_name(self):
|
||||
if self._pointer.preferred_name != NULL :
|
||||
return bytes2str(self._pointer.preferred_name)
|
||||
@preferred_name.setter
|
||||
def preferred_name(self, str new_preferred_name) : # @DuplicatedSignature
|
||||
if (obi_taxo_add_preferred_name_with_taxon(self._tax._pointer, self._pointer, str2bytes(new_preferred_name)) < 0) :
|
||||
raise Exception("Error adding a new preferred name to a taxon")
|
||||
|
||||
def __repr__(self):
|
||||
d = {}
|
||||
d['taxid'] = self.taxid
|
||||
d['name'] = self.name
|
||||
d['preferred name'] = self.preferred_name
|
||||
d['parent'] = self.parent.taxid
|
||||
d['farest'] = self.farest
|
||||
return str(d)
|
||||
|
65
python/obitools3/obidms/capi/obialign.cfiles
Normal file
65
python/obitools3/obidms/capi/obialign.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../../src/bloom.h
|
||||
../../../../src/bloom.c
|
||||
../../../../src/char_str_indexer.h
|
||||
../../../../src/char_str_indexer.c
|
||||
../../../../src/crc64.h
|
||||
../../../../src/crc64.c
|
||||
../../../../src/dna_seq_indexer.h
|
||||
../../../../src/dna_seq_indexer.c
|
||||
../../../../src/encode.h
|
||||
../../../../src/encode.c
|
||||
../../../../src/hashtable.h
|
||||
../../../../src/hashtable.c
|
||||
../../../../src/murmurhash2.h
|
||||
../../../../src/murmurhash2.c
|
||||
../../../../src/obi_align.h
|
||||
../../../../src/obi_align.c
|
||||
../../../../src/obiavl.h
|
||||
../../../../src/obiavl.c
|
||||
../../../../src/obiblob_indexer.h
|
||||
../../../../src/obiblob_indexer.c
|
||||
../../../../src/obiblob.h
|
||||
../../../../src/obiblob.c
|
||||
../../../../src/obidebug.h
|
||||
../../../../src/obidms_taxonomy.h
|
||||
../../../../src/obidms_taxonomy.c
|
||||
../../../../src/obidms.h
|
||||
../../../../src/obidms.c
|
||||
../../../../src/obidmscolumn_blob.c
|
||||
../../../../src/obidmscolumn_blob.h
|
||||
../../../../src/obidmscolumn_bool.c
|
||||
../../../../src/obidmscolumn_bool.h
|
||||
../../../../src/obidmscolumn_char.c
|
||||
../../../../src/obidmscolumn_char.h
|
||||
../../../../src/obidmscolumn_float.c
|
||||
../../../../src/obidmscolumn_float.h
|
||||
../../../../src/obidmscolumn_idx.h
|
||||
../../../../src/obidmscolumn_idx.c
|
||||
../../../../src/obidmscolumn_int.c
|
||||
../../../../src/obidmscolumn_int.h
|
||||
../../../../src/obidmscolumn_qual.h
|
||||
../../../../src/obidmscolumn_qual.c
|
||||
../../../../src/obidmscolumn_seq.c
|
||||
../../../../src/obidmscolumn_seq.h
|
||||
../../../../src/obidmscolumn_str.c
|
||||
../../../../src/obidmscolumn_str.h
|
||||
../../../../src/obidmscolumn.h
|
||||
../../../../src/obidmscolumn.c
|
||||
../../../../src/obidmscolumndir.h
|
||||
../../../../src/obidmscolumndir.c
|
||||
../../../../src/obierrno.h
|
||||
../../../../src/obierrno.c
|
||||
../../../../src/obilittlebigman.h
|
||||
../../../../src/obilittlebigman.c
|
||||
../../../../src/obitypes.h
|
||||
../../../../src/obitypes.c
|
||||
../../../../src/obiview.h
|
||||
../../../../src/obiview.c
|
||||
../../../../src/sse_banded_LCS_alignment.h
|
||||
../../../../src/sse_banded_LCS_alignment.c
|
||||
../../../../src/uint8_indexer.h
|
||||
../../../../src/uint8_indexer.c
|
||||
../../../../src/upperband.h
|
||||
../../../../src/upperband.c
|
||||
../../../../src/utils.h
|
||||
../../../../src/utils.c
|
@ -18,5 +18,24 @@ cdef extern from "obi_align.h" nogil:
|
||||
double threshold,
|
||||
bint normalize,
|
||||
int reference,
|
||||
bint similarity_mode)
|
||||
bint similarity_mode,
|
||||
int thread_count)
|
||||
|
||||
|
||||
int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
const_char_p seq1_view_name,
|
||||
const_char_p seq2_view_name,
|
||||
const_char_p seq1_column_name,
|
||||
const_char_p seq2_column_name,
|
||||
const_char_p seq1_elt_name,
|
||||
const_char_p seq2_elt_name,
|
||||
const_char_p id1_column_name,
|
||||
const_char_p id2_column_name,
|
||||
const_char_p output_view_name,
|
||||
const_char_p output_view_comments,
|
||||
bint print_seq,
|
||||
bint print_count,
|
||||
double threshold,
|
||||
bint normalize,
|
||||
int reference,
|
||||
bint similarity_mode);
|
||||
|
65
python/obitools3/obidms/capi/obidms.cfiles
Normal file
65
python/obitools3/obidms/capi/obidms.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../../src/bloom.h
|
||||
../../../../src/bloom.c
|
||||
../../../../src/char_str_indexer.h
|
||||
../../../../src/char_str_indexer.c
|
||||
../../../../src/crc64.h
|
||||
../../../../src/crc64.c
|
||||
../../../../src/dna_seq_indexer.h
|
||||
../../../../src/dna_seq_indexer.c
|
||||
../../../../src/encode.h
|
||||
../../../../src/encode.c
|
||||
../../../../src/hashtable.h
|
||||
../../../../src/hashtable.c
|
||||
../../../../src/murmurhash2.h
|
||||
../../../../src/murmurhash2.c
|
||||
../../../../src/obi_align.h
|
||||
../../../../src/obi_align.c
|
||||
../../../../src/obiavl.h
|
||||
../../../../src/obiavl.c
|
||||
../../../../src/obiblob_indexer.h
|
||||
../../../../src/obiblob_indexer.c
|
||||
../../../../src/obiblob.h
|
||||
../../../../src/obiblob.c
|
||||
../../../../src/obidebug.h
|
||||
../../../../src/obidms_taxonomy.h
|
||||
../../../../src/obidms_taxonomy.c
|
||||
../../../../src/obidms.h
|
||||
../../../../src/obidms.c
|
||||
../../../../src/obidmscolumn_blob.c
|
||||
../../../../src/obidmscolumn_blob.h
|
||||
../../../../src/obidmscolumn_bool.c
|
||||
../../../../src/obidmscolumn_bool.h
|
||||
../../../../src/obidmscolumn_char.c
|
||||
../../../../src/obidmscolumn_char.h
|
||||
../../../../src/obidmscolumn_float.c
|
||||
../../../../src/obidmscolumn_float.h
|
||||
../../../../src/obidmscolumn_idx.h
|
||||
../../../../src/obidmscolumn_idx.c
|
||||
../../../../src/obidmscolumn_int.c
|
||||
../../../../src/obidmscolumn_int.h
|
||||
../../../../src/obidmscolumn_qual.h
|
||||
../../../../src/obidmscolumn_qual.c
|
||||
../../../../src/obidmscolumn_seq.c
|
||||
../../../../src/obidmscolumn_seq.h
|
||||
../../../../src/obidmscolumn_str.c
|
||||
../../../../src/obidmscolumn_str.h
|
||||
../../../../src/obidmscolumn.h
|
||||
../../../../src/obidmscolumn.c
|
||||
../../../../src/obidmscolumndir.h
|
||||
../../../../src/obidmscolumndir.c
|
||||
../../../../src/obierrno.h
|
||||
../../../../src/obierrno.c
|
||||
../../../../src/obilittlebigman.h
|
||||
../../../../src/obilittlebigman.c
|
||||
../../../../src/obitypes.h
|
||||
../../../../src/obitypes.c
|
||||
../../../../src/obiview.h
|
||||
../../../../src/obiview.c
|
||||
../../../../src/sse_banded_LCS_alignment.h
|
||||
../../../../src/sse_banded_LCS_alignment.c
|
||||
../../../../src/uint8_indexer.h
|
||||
../../../../src/uint8_indexer.c
|
||||
../../../../src/upperband.h
|
||||
../../../../src/upperband.c
|
||||
../../../../src/utils.h
|
||||
../../../../src/utils.c
|
65
python/obitools3/obidms/capi/obidmscolumn.cfiles
Normal file
65
python/obitools3/obidms/capi/obidmscolumn.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../../src/bloom.h
|
||||
../../../../src/bloom.c
|
||||
../../../../src/char_str_indexer.h
|
||||
../../../../src/char_str_indexer.c
|
||||
../../../../src/crc64.h
|
||||
../../../../src/crc64.c
|
||||
../../../../src/dna_seq_indexer.h
|
||||
../../../../src/dna_seq_indexer.c
|
||||
../../../../src/encode.h
|
||||
../../../../src/encode.c
|
||||
../../../../src/hashtable.h
|
||||
../../../../src/hashtable.c
|
||||
../../../../src/murmurhash2.h
|
||||
../../../../src/murmurhash2.c
|
||||
../../../../src/obi_align.h
|
||||
../../../../src/obi_align.c
|
||||
../../../../src/obiavl.h
|
||||
../../../../src/obiavl.c
|
||||
../../../../src/obiblob_indexer.h
|
||||
../../../../src/obiblob_indexer.c
|
||||
../../../../src/obiblob.h
|
||||
../../../../src/obiblob.c
|
||||
../../../../src/obidebug.h
|
||||
../../../../src/obidms_taxonomy.h
|
||||
../../../../src/obidms_taxonomy.c
|
||||
../../../../src/obidms.h
|
||||
../../../../src/obidms.c
|
||||
../../../../src/obidmscolumn_blob.c
|
||||
../../../../src/obidmscolumn_blob.h
|
||||
../../../../src/obidmscolumn_bool.c
|
||||
../../../../src/obidmscolumn_bool.h
|
||||
../../../../src/obidmscolumn_char.c
|
||||
../../../../src/obidmscolumn_char.h
|
||||
../../../../src/obidmscolumn_float.c
|
||||
../../../../src/obidmscolumn_float.h
|
||||
../../../../src/obidmscolumn_idx.h
|
||||
../../../../src/obidmscolumn_idx.c
|
||||
../../../../src/obidmscolumn_int.c
|
||||
../../../../src/obidmscolumn_int.h
|
||||
../../../../src/obidmscolumn_qual.h
|
||||
../../../../src/obidmscolumn_qual.c
|
||||
../../../../src/obidmscolumn_seq.c
|
||||
../../../../src/obidmscolumn_seq.h
|
||||
../../../../src/obidmscolumn_str.c
|
||||
../../../../src/obidmscolumn_str.h
|
||||
../../../../src/obidmscolumn.h
|
||||
../../../../src/obidmscolumn.c
|
||||
../../../../src/obidmscolumndir.h
|
||||
../../../../src/obidmscolumndir.c
|
||||
../../../../src/obierrno.h
|
||||
../../../../src/obierrno.c
|
||||
../../../../src/obilittlebigman.h
|
||||
../../../../src/obilittlebigman.c
|
||||
../../../../src/obitypes.h
|
||||
../../../../src/obitypes.c
|
||||
../../../../src/obiview.h
|
||||
../../../../src/obiview.c
|
||||
../../../../src/sse_banded_LCS_alignment.h
|
||||
../../../../src/sse_banded_LCS_alignment.c
|
||||
../../../../src/uint8_indexer.h
|
||||
../../../../src/uint8_indexer.c
|
||||
../../../../src/upperband.h
|
||||
../../../../src/upperband.c
|
||||
../../../../src/utils.h
|
||||
../../../../src/utils.c
|
@ -48,29 +48,6 @@ cdef extern from "obidmscolumn.h" nogil:
|
||||
|
||||
ctypedef OBIDMS_column_t* OBIDMS_column_p
|
||||
|
||||
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
const_char_p column_name,
|
||||
OBIType_t type,
|
||||
index_t nb_lines,
|
||||
index_t nb_elements_per_line,
|
||||
const_char_p elements_names,
|
||||
const_char_p indexer_name,
|
||||
const_char_p associated_colum_name,
|
||||
obiversion_t associated_colum_version,
|
||||
const_char_p comments)
|
||||
|
||||
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
const_char_p column_name,
|
||||
obiversion_t version_number)
|
||||
|
||||
int obi_close_column(OBIDMS_column_p column)
|
||||
|
||||
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
||||
OBIDMS_column_p line_selection,
|
||||
const_char_p column_name,
|
||||
obiversion_t version_number,
|
||||
bint clone_data)
|
||||
|
||||
int obi_close_column(OBIDMS_column_p column)
|
||||
|
||||
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms,
|
||||
@ -82,7 +59,7 @@ cdef extern from "obidmscolumn.h" nogil:
|
||||
|
||||
int obi_close_header(OBIDMS_column_header_p header)
|
||||
|
||||
int obi_select(OBIDMS_column_p line_selection_column, index_t line_to_grep)
|
||||
char* obi_get_elements_names(OBIDMS_column_p column)
|
||||
|
||||
|
||||
cdef extern from "obidmscolumn_int.h" nogil:
|
||||
|
65
python/obitools3/obidms/capi/obierrno.cfiles
Normal file
65
python/obitools3/obidms/capi/obierrno.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../../src/bloom.h
|
||||
../../../../src/bloom.c
|
||||
../../../../src/char_str_indexer.h
|
||||
../../../../src/char_str_indexer.c
|
||||
../../../../src/crc64.h
|
||||
../../../../src/crc64.c
|
||||
../../../../src/dna_seq_indexer.h
|
||||
../../../../src/dna_seq_indexer.c
|
||||
../../../../src/encode.h
|
||||
../../../../src/encode.c
|
||||
../../../../src/hashtable.h
|
||||
../../../../src/hashtable.c
|
||||
../../../../src/murmurhash2.h
|
||||
../../../../src/murmurhash2.c
|
||||
../../../../src/obi_align.h
|
||||
../../../../src/obi_align.c
|
||||
../../../../src/obiavl.h
|
||||
../../../../src/obiavl.c
|
||||
../../../../src/obiblob_indexer.h
|
||||
../../../../src/obiblob_indexer.c
|
||||
../../../../src/obiblob.h
|
||||
../../../../src/obiblob.c
|
||||
../../../../src/obidebug.h
|
||||
../../../../src/obidms_taxonomy.h
|
||||
../../../../src/obidms_taxonomy.c
|
||||
../../../../src/obidms.h
|
||||
../../../../src/obidms.c
|
||||
../../../../src/obidmscolumn_blob.c
|
||||
../../../../src/obidmscolumn_blob.h
|
||||
../../../../src/obidmscolumn_bool.c
|
||||
../../../../src/obidmscolumn_bool.h
|
||||
../../../../src/obidmscolumn_char.c
|
||||
../../../../src/obidmscolumn_char.h
|
||||
../../../../src/obidmscolumn_float.c
|
||||
../../../../src/obidmscolumn_float.h
|
||||
../../../../src/obidmscolumn_idx.h
|
||||
../../../../src/obidmscolumn_idx.c
|
||||
../../../../src/obidmscolumn_int.c
|
||||
../../../../src/obidmscolumn_int.h
|
||||
../../../../src/obidmscolumn_qual.h
|
||||
../../../../src/obidmscolumn_qual.c
|
||||
../../../../src/obidmscolumn_seq.c
|
||||
../../../../src/obidmscolumn_seq.h
|
||||
../../../../src/obidmscolumn_str.c
|
||||
../../../../src/obidmscolumn_str.h
|
||||
../../../../src/obidmscolumn.h
|
||||
../../../../src/obidmscolumn.c
|
||||
../../../../src/obidmscolumndir.h
|
||||
../../../../src/obidmscolumndir.c
|
||||
../../../../src/obierrno.h
|
||||
../../../../src/obierrno.c
|
||||
../../../../src/obilittlebigman.h
|
||||
../../../../src/obilittlebigman.c
|
||||
../../../../src/obitypes.h
|
||||
../../../../src/obitypes.c
|
||||
../../../../src/obiview.h
|
||||
../../../../src/obiview.c
|
||||
../../../../src/sse_banded_LCS_alignment.h
|
||||
../../../../src/sse_banded_LCS_alignment.c
|
||||
../../../../src/uint8_indexer.h
|
||||
../../../../src/uint8_indexer.c
|
||||
../../../../src/upperband.h
|
||||
../../../../src/upperband.c
|
||||
../../../../src/utils.h
|
||||
../../../../src/utils.c
|
65
python/obitools3/obidms/capi/obitaxonomy.cfiles
Normal file
65
python/obitools3/obidms/capi/obitaxonomy.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../../src/bloom.h
|
||||
../../../../src/bloom.c
|
||||
../../../../src/char_str_indexer.h
|
||||
../../../../src/char_str_indexer.c
|
||||
../../../../src/crc64.h
|
||||
../../../../src/crc64.c
|
||||
../../../../src/dna_seq_indexer.h
|
||||
../../../../src/dna_seq_indexer.c
|
||||
../../../../src/encode.h
|
||||
../../../../src/encode.c
|
||||
../../../../src/hashtable.h
|
||||
../../../../src/hashtable.c
|
||||
../../../../src/murmurhash2.h
|
||||
../../../../src/murmurhash2.c
|
||||
../../../../src/obi_align.h
|
||||
../../../../src/obi_align.c
|
||||
../../../../src/obiavl.h
|
||||
../../../../src/obiavl.c
|
||||
../../../../src/obiblob_indexer.h
|
||||
../../../../src/obiblob_indexer.c
|
||||
../../../../src/obiblob.h
|
||||
../../../../src/obiblob.c
|
||||
../../../../src/obidebug.h
|
||||
../../../../src/obidms_taxonomy.h
|
||||
../../../../src/obidms_taxonomy.c
|
||||
../../../../src/obidms.h
|
||||
../../../../src/obidms.c
|
||||
../../../../src/obidmscolumn_blob.c
|
||||
../../../../src/obidmscolumn_blob.h
|
||||
../../../../src/obidmscolumn_bool.c
|
||||
../../../../src/obidmscolumn_bool.h
|
||||
../../../../src/obidmscolumn_char.c
|
||||
../../../../src/obidmscolumn_char.h
|
||||
../../../../src/obidmscolumn_float.c
|
||||
../../../../src/obidmscolumn_float.h
|
||||
../../../../src/obidmscolumn_idx.h
|
||||
../../../../src/obidmscolumn_idx.c
|
||||
../../../../src/obidmscolumn_int.c
|
||||
../../../../src/obidmscolumn_int.h
|
||||
../../../../src/obidmscolumn_qual.h
|
||||
../../../../src/obidmscolumn_qual.c
|
||||
../../../../src/obidmscolumn_seq.c
|
||||
../../../../src/obidmscolumn_seq.h
|
||||
../../../../src/obidmscolumn_str.c
|
||||
../../../../src/obidmscolumn_str.h
|
||||
../../../../src/obidmscolumn.h
|
||||
../../../../src/obidmscolumn.c
|
||||
../../../../src/obidmscolumndir.h
|
||||
../../../../src/obidmscolumndir.c
|
||||
../../../../src/obierrno.h
|
||||
../../../../src/obierrno.c
|
||||
../../../../src/obilittlebigman.h
|
||||
../../../../src/obilittlebigman.c
|
||||
../../../../src/obitypes.h
|
||||
../../../../src/obitypes.c
|
||||
../../../../src/obiview.h
|
||||
../../../../src/obiview.c
|
||||
../../../../src/sse_banded_LCS_alignment.h
|
||||
../../../../src/sse_banded_LCS_alignment.c
|
||||
../../../../src/uint8_indexer.h
|
||||
../../../../src/uint8_indexer.c
|
||||
../../../../src/upperband.h
|
||||
../../../../src/upperband.c
|
||||
../../../../src/utils.h
|
||||
../../../../src/utils.c
|
@ -13,6 +13,7 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
int32_t farest
|
||||
ecotxnode* parent
|
||||
char* name
|
||||
char* preferred_name
|
||||
|
||||
ctypedef ecotxnode ecotx_t
|
||||
|
||||
@ -56,4 +57,9 @@ cdef extern from "obidms_taxonomy.h" nogil:
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid)
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name)
|
||||
|
||||
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name)
|
||||
|
||||
|
65
python/obitools3/obidms/capi/obitypes.cfiles
Normal file
65
python/obitools3/obidms/capi/obitypes.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../../src/bloom.h
|
||||
../../../../src/bloom.c
|
||||
../../../../src/char_str_indexer.h
|
||||
../../../../src/char_str_indexer.c
|
||||
../../../../src/crc64.h
|
||||
../../../../src/crc64.c
|
||||
../../../../src/dna_seq_indexer.h
|
||||
../../../../src/dna_seq_indexer.c
|
||||
../../../../src/encode.h
|
||||
../../../../src/encode.c
|
||||
../../../../src/hashtable.h
|
||||
../../../../src/hashtable.c
|
||||
../../../../src/murmurhash2.h
|
||||
../../../../src/murmurhash2.c
|
||||
../../../../src/obi_align.h
|
||||
../../../../src/obi_align.c
|
||||
../../../../src/obiavl.h
|
||||
../../../../src/obiavl.c
|
||||
../../../../src/obiblob_indexer.h
|
||||
../../../../src/obiblob_indexer.c
|
||||
../../../../src/obiblob.h
|
||||
../../../../src/obiblob.c
|
||||
../../../../src/obidebug.h
|
||||
../../../../src/obidms_taxonomy.h
|
||||
../../../../src/obidms_taxonomy.c
|
||||
../../../../src/obidms.h
|
||||
../../../../src/obidms.c
|
||||
../../../../src/obidmscolumn_blob.c
|
||||
../../../../src/obidmscolumn_blob.h
|
||||
../../../../src/obidmscolumn_bool.c
|
||||
../../../../src/obidmscolumn_bool.h
|
||||
../../../../src/obidmscolumn_char.c
|
||||
../../../../src/obidmscolumn_char.h
|
||||
../../../../src/obidmscolumn_float.c
|
||||
../../../../src/obidmscolumn_float.h
|
||||
../../../../src/obidmscolumn_idx.h
|
||||
../../../../src/obidmscolumn_idx.c
|
||||
../../../../src/obidmscolumn_int.c
|
||||
../../../../src/obidmscolumn_int.h
|
||||
../../../../src/obidmscolumn_qual.h
|
||||
../../../../src/obidmscolumn_qual.c
|
||||
../../../../src/obidmscolumn_seq.c
|
||||
../../../../src/obidmscolumn_seq.h
|
||||
../../../../src/obidmscolumn_str.c
|
||||
../../../../src/obidmscolumn_str.h
|
||||
../../../../src/obidmscolumn.h
|
||||
../../../../src/obidmscolumn.c
|
||||
../../../../src/obidmscolumndir.h
|
||||
../../../../src/obidmscolumndir.c
|
||||
../../../../src/obierrno.h
|
||||
../../../../src/obierrno.c
|
||||
../../../../src/obilittlebigman.h
|
||||
../../../../src/obilittlebigman.c
|
||||
../../../../src/obitypes.h
|
||||
../../../../src/obitypes.c
|
||||
../../../../src/obiview.h
|
||||
../../../../src/obiview.c
|
||||
../../../../src/sse_banded_LCS_alignment.h
|
||||
../../../../src/sse_banded_LCS_alignment.c
|
||||
../../../../src/uint8_indexer.h
|
||||
../../../../src/uint8_indexer.c
|
||||
../../../../src/upperband.h
|
||||
../../../../src/upperband.c
|
||||
../../../../src/utils.h
|
||||
../../../../src/utils.c
|
65
python/obitools3/obidms/capi/obiutils.cfiles
Normal file
65
python/obitools3/obidms/capi/obiutils.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../../src/bloom.h
|
||||
../../../../src/bloom.c
|
||||
../../../../src/char_str_indexer.h
|
||||
../../../../src/char_str_indexer.c
|
||||
../../../../src/crc64.h
|
||||
../../../../src/crc64.c
|
||||
../../../../src/dna_seq_indexer.h
|
||||
../../../../src/dna_seq_indexer.c
|
||||
../../../../src/encode.h
|
||||
../../../../src/encode.c
|
||||
../../../../src/hashtable.h
|
||||
../../../../src/hashtable.c
|
||||
../../../../src/murmurhash2.h
|
||||
../../../../src/murmurhash2.c
|
||||
../../../../src/obi_align.h
|
||||
../../../../src/obi_align.c
|
||||
../../../../src/obiavl.h
|
||||
../../../../src/obiavl.c
|
||||
../../../../src/obiblob_indexer.h
|
||||
../../../../src/obiblob_indexer.c
|
||||
../../../../src/obiblob.h
|
||||
../../../../src/obiblob.c
|
||||
../../../../src/obidebug.h
|
||||
../../../../src/obidms_taxonomy.h
|
||||
../../../../src/obidms_taxonomy.c
|
||||
../../../../src/obidms.h
|
||||
../../../../src/obidms.c
|
||||
../../../../src/obidmscolumn_blob.c
|
||||
../../../../src/obidmscolumn_blob.h
|
||||
../../../../src/obidmscolumn_bool.c
|
||||
../../../../src/obidmscolumn_bool.h
|
||||
../../../../src/obidmscolumn_char.c
|
||||
../../../../src/obidmscolumn_char.h
|
||||
../../../../src/obidmscolumn_float.c
|
||||
../../../../src/obidmscolumn_float.h
|
||||
../../../../src/obidmscolumn_idx.h
|
||||
../../../../src/obidmscolumn_idx.c
|
||||
../../../../src/obidmscolumn_int.c
|
||||
../../../../src/obidmscolumn_int.h
|
||||
../../../../src/obidmscolumn_qual.h
|
||||
../../../../src/obidmscolumn_qual.c
|
||||
../../../../src/obidmscolumn_seq.c
|
||||
../../../../src/obidmscolumn_seq.h
|
||||
../../../../src/obidmscolumn_str.c
|
||||
../../../../src/obidmscolumn_str.h
|
||||
../../../../src/obidmscolumn.h
|
||||
../../../../src/obidmscolumn.c
|
||||
../../../../src/obidmscolumndir.h
|
||||
../../../../src/obidmscolumndir.c
|
||||
../../../../src/obierrno.h
|
||||
../../../../src/obierrno.c
|
||||
../../../../src/obilittlebigman.h
|
||||
../../../../src/obilittlebigman.c
|
||||
../../../../src/obitypes.h
|
||||
../../../../src/obitypes.c
|
||||
../../../../src/obiview.h
|
||||
../../../../src/obiview.c
|
||||
../../../../src/sse_banded_LCS_alignment.h
|
||||
../../../../src/sse_banded_LCS_alignment.c
|
||||
../../../../src/uint8_indexer.h
|
||||
../../../../src/uint8_indexer.c
|
||||
../../../../src/upperband.h
|
||||
../../../../src/upperband.c
|
||||
../../../../src/utils.h
|
||||
../../../../src/utils.c
|
65
python/obitools3/obidms/capi/obiview.cfiles
Normal file
65
python/obitools3/obidms/capi/obiview.cfiles
Normal file
@ -0,0 +1,65 @@
|
||||
../../../../src/bloom.h
|
||||
../../../../src/bloom.c
|
||||
../../../../src/char_str_indexer.h
|
||||
../../../../src/char_str_indexer.c
|
||||
../../../../src/crc64.h
|
||||
../../../../src/crc64.c
|
||||
../../../../src/dna_seq_indexer.h
|
||||
../../../../src/dna_seq_indexer.c
|
||||
../../../../src/encode.h
|
||||
../../../../src/encode.c
|
||||
../../../../src/hashtable.h
|
||||
../../../../src/hashtable.c
|
||||
../../../../src/murmurhash2.h
|
||||
../../../../src/murmurhash2.c
|
||||
../../../../src/obi_align.h
|
||||
../../../../src/obi_align.c
|
||||
../../../../src/obiavl.h
|
||||
../../../../src/obiavl.c
|
||||
../../../../src/obiblob_indexer.h
|
||||
../../../../src/obiblob_indexer.c
|
||||
../../../../src/obiblob.h
|
||||
../../../../src/obiblob.c
|
||||
../../../../src/obidebug.h
|
||||
../../../../src/obidms_taxonomy.h
|
||||
../../../../src/obidms_taxonomy.c
|
||||
../../../../src/obidms.h
|
||||
../../../../src/obidms.c
|
||||
../../../../src/obidmscolumn_blob.c
|
||||
../../../../src/obidmscolumn_blob.h
|
||||
../../../../src/obidmscolumn_bool.c
|
||||
../../../../src/obidmscolumn_bool.h
|
||||
../../../../src/obidmscolumn_char.c
|
||||
../../../../src/obidmscolumn_char.h
|
||||
../../../../src/obidmscolumn_float.c
|
||||
../../../../src/obidmscolumn_float.h
|
||||
../../../../src/obidmscolumn_idx.h
|
||||
../../../../src/obidmscolumn_idx.c
|
||||
../../../../src/obidmscolumn_int.c
|
||||
../../../../src/obidmscolumn_int.h
|
||||
../../../../src/obidmscolumn_qual.h
|
||||
../../../../src/obidmscolumn_qual.c
|
||||
../../../../src/obidmscolumn_seq.c
|
||||
../../../../src/obidmscolumn_seq.h
|
||||
../../../../src/obidmscolumn_str.c
|
||||
../../../../src/obidmscolumn_str.h
|
||||
../../../../src/obidmscolumn.h
|
||||
../../../../src/obidmscolumn.c
|
||||
../../../../src/obidmscolumndir.h
|
||||
../../../../src/obidmscolumndir.c
|
||||
../../../../src/obierrno.h
|
||||
../../../../src/obierrno.c
|
||||
../../../../src/obilittlebigman.h
|
||||
../../../../src/obilittlebigman.c
|
||||
../../../../src/obitypes.h
|
||||
../../../../src/obitypes.c
|
||||
../../../../src/obiview.h
|
||||
../../../../src/obiview.c
|
||||
../../../../src/sse_banded_LCS_alignment.h
|
||||
../../../../src/sse_banded_LCS_alignment.c
|
||||
../../../../src/uint8_indexer.h
|
||||
../../../../src/uint8_indexer.c
|
||||
../../../../src/upperband.h
|
||||
../../../../src/upperband.c
|
||||
../../../../src/utils.h
|
||||
../../../../src/utils.c
|
@ -68,7 +68,7 @@ cdef extern from "obiview.h" nogil:
|
||||
|
||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column)
|
||||
|
||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
|
||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bint finished)
|
||||
|
||||
int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
|
||||
|
||||
@ -96,10 +96,6 @@ cdef extern from "obiview.h" nogil:
|
||||
|
||||
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
|
||||
|
||||
int obi_save_view(Obiview_p view)
|
||||
|
||||
int obi_close_view(Obiview_p view)
|
||||
|
||||
int obi_save_and_close_view(Obiview_p view)
|
||||
|
||||
|
||||
|
1028
src/obi_align.c
1028
src/obi_align.c
File diff suppressed because it is too large
Load Diff
@ -1,12 +1,12 @@
|
||||
/****************************************************************************
|
||||
* Sequence alignment functions header file *
|
||||
* LCS sequence alignment functions header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obi_align.h
|
||||
* @author Celine Mercier
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date May 11th 2016
|
||||
* @brief Header file for the functions handling the alignment of DNA sequences.
|
||||
* @brief Header file for the functions handling the LCS alignment of DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
|
||||
|
||||
/**
|
||||
* @brief Aligns a NUC_SEQ column with itself.
|
||||
* @brief Aligns an OBI_SEQ column with itself.
|
||||
*
|
||||
* Note: The columns where the results are written are automatically named and created.
|
||||
*
|
||||
@ -77,7 +77,7 @@
|
||||
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||
* @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
||||
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
||||
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
@ -92,18 +92,64 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
|
||||
const char* id_column_name,
|
||||
const char* output_view_name, const char* output_view_comments,
|
||||
bool print_seq, bool print_count,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode);
|
||||
double threshold, bool normalize, int reference, bool similarity_mode,
|
||||
int thread_count);
|
||||
|
||||
|
||||
/**
|
||||
* @brief
|
||||
* @brief Aligns two OBI_SEQ columns.
|
||||
*
|
||||
* TODO
|
||||
* The columns must belong to the same OBIDMS, but can belong to different views.
|
||||
*
|
||||
* Note: The columns where the results are written are automatically named and created.
|
||||
*
|
||||
* @param dms A pointer on an OBIDMS.
|
||||
* @param seq1_view_name The name of the view where the first column to align is.
|
||||
* @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one).
|
||||
* @param seq1_column_name The name of the first OBI_SEQ column in the input view to align.
|
||||
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
|
||||
* @param seq2_column_name The name of the second OBI_SEQ column in the input view to align.
|
||||
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
|
||||
* @param seq1_elt_name The name of the element in the first column corresponding to the sequence to align, if the column has multiple
|
||||
* elements per line.
|
||||
* @param seq2_elt_name The name of the element in the second column corresponding to the sequence to align, if the column has multiple
|
||||
* elements per line.
|
||||
* @param id1_column_name The name of the column in the first input view containing the identifiers of the first sequence to align.
|
||||
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
|
||||
* @param id2_column_name The name of the column in the second input view containing the identifiers of the second sequence to align.
|
||||
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
|
||||
* @param output_view_name The name of the output view where the results should be written (should not already exist).
|
||||
* @param output_view_comments The comments that should be associated with the output view.
|
||||
* @param print_seq A boolean indicating whether the aligned sequences should be copied in the output view.
|
||||
* @param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view.
|
||||
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||
* @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
||||
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,
|
||||
// Obiview_p score_view, OBIDMS_column_p score_column,
|
||||
// double threshold, bool normalize, int reference, bool similarity_mode);
|
||||
int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
const char* seq1_view_name,
|
||||
const char* seq2_view_name,
|
||||
const char* seq1_column_name,
|
||||
const char* seq2_column_name,
|
||||
const char* seq1_elt_name,
|
||||
const char* seq2_elt_name,
|
||||
const char* id1_column_name,
|
||||
const char* id2_column_name,
|
||||
const char* output_view_name, const char* output_view_comments,
|
||||
bool print_seq, bool print_count,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode);
|
||||
|
||||
|
||||
#endif /* OBI_ALIGN_H_ */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
||||
* @file obidms_taxonomy.h
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date March 2nd 2016
|
||||
* @brief Header file for the functions handling the reading of binary taxonomy files.
|
||||
* @brief Header file for the functions handling the reading and writing of taxonomy files.
|
||||
*/
|
||||
|
||||
|
||||
@ -17,105 +17,384 @@
|
||||
#include "obidms.h"
|
||||
|
||||
|
||||
#define MIN_LOCAL_TAXID (10000000)
|
||||
#define TAX_NAME_LEN (1024)
|
||||
#define MIN_LOCAL_TAXID (10000000) /**< The minimum taxid for a taxon added locally (i.e. not an NCBI taxon).
|
||||
*/
|
||||
#define TAX_NAME_LEN (1024) /**< The maximum length for the taxonomy name.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxon as stored in a .tdx file.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
int32_t parent;
|
||||
int32_t name_length;
|
||||
char name[1];
|
||||
int32_t taxid; /**< Taxid.
|
||||
*/
|
||||
int32_t rank; /**< Rank index.
|
||||
*/
|
||||
int32_t parent; /**< Index, in the taxid index, of the parent node in the taxonomic tree.
|
||||
*/
|
||||
int32_t name_length; /**< Length of the taxon scientific name.
|
||||
*/
|
||||
char name[]; /**< Scientific name of the taxon.
|
||||
*/
|
||||
} ecotxformat_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxon as stored in a taxonomy structure.
|
||||
*/
|
||||
typedef struct ecotxnode {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
int32_t farest;
|
||||
int32_t idx;
|
||||
struct ecotxnode* parent;
|
||||
char* name;
|
||||
bool local;
|
||||
int32_t taxid; /**< Taxid. // TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
|
||||
*/
|
||||
int32_t rank; /**< Rank index in ecorankidx_t structure.
|
||||
*/
|
||||
int32_t farest; /**< Longest branch length, used to compute distances between taxa faster.
|
||||
*/
|
||||
int32_t idx; /**< Index in the ecotxidx_t structure.
|
||||
*/
|
||||
struct ecotxnode* parent; /**< Pointer on the parent node in the taxonomic tree.
|
||||
*/
|
||||
char* name; /**< Scientific name of the taxon.
|
||||
*/
|
||||
char* preferred_name; /**< Preferred name of the taxon if there is one, otherwise NULL.
|
||||
*/
|
||||
bool local; /**< A boolean indicating whether the taxon is local or not.
|
||||
*/
|
||||
} ecotx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for the taxon index in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
int32_t ncbi_count;
|
||||
int32_t local_count;
|
||||
int32_t max_taxid;
|
||||
int32_t buffer_size;
|
||||
ecotx_t taxon[1];
|
||||
int32_t count; /**< Number of taxa.
|
||||
*/
|
||||
int32_t ncbi_count; /**< Number of NCBI taxa.
|
||||
*/
|
||||
int32_t local_count; /**< Number of taxa added locally.
|
||||
*/
|
||||
int32_t max_taxid; /**< Maximum taxid existing in the taxon index.
|
||||
*/
|
||||
int32_t buffer_size; /**< Number of taxa. // TODO kept this but not sure of its use
|
||||
*/
|
||||
ecotx_t taxon[]; /**< Taxon array.
|
||||
*/
|
||||
} ecotxidx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for the rank index in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
char* label[1];
|
||||
int32_t count; /**< Number of ranks.
|
||||
*/
|
||||
char* label[]; /**< Array of rank names.
|
||||
*/
|
||||
} ecorankidx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxon name as stored in a .ndx file.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t is_scientific_name;
|
||||
int32_t name_length;
|
||||
int32_t class_length;
|
||||
int32_t taxid; // taxid idx
|
||||
char names[1];
|
||||
int32_t is_scientific_name; /**< A boolean indicating whether the name is a scientific name or not.
|
||||
*/
|
||||
int32_t name_length; /**< The name length.
|
||||
*/
|
||||
int32_t class_length; /**< The name class length.
|
||||
*/
|
||||
int32_t taxid; /**< Index of the taxon in the taxid index.
|
||||
*/
|
||||
char names[]; /**< Taxon name and name class concatenated.
|
||||
*/
|
||||
} econameformat_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxon name as stored in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
char* name;
|
||||
char* class_name;
|
||||
int32_t is_scientific_name;
|
||||
struct ecotxnode* taxon;
|
||||
char* name; /**< Taxon name.
|
||||
*/
|
||||
char* class_name; /**< Name class.
|
||||
*/
|
||||
int32_t is_scientific_name; /**< A boolean indicating whether the name is a scientific name or not.
|
||||
*/
|
||||
struct ecotxnode* taxon; /**< Pointer on the taxon in the taxon index.
|
||||
*/
|
||||
} econame_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for the name index in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
econame_t names[1];
|
||||
int32_t count; /**< Number of names.
|
||||
*/
|
||||
econame_t names[]; /**< Array of names.
|
||||
*/
|
||||
} econameidx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxid/index pair as stored in a taxonomy structure.
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t taxid; /**< Taxid.
|
||||
*/
|
||||
int32_t idx; /**< Index of the taxid in the taxon index, -1 if the taxid is deprecated.
|
||||
*/
|
||||
} ecomerged_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a merged taxid index in a taxonomy structure.
|
||||
*
|
||||
* This index includes all deprecated taxids that now refer to different taxids, and
|
||||
* the deprecated taxids that are deleted.
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
int32_t count; /**< Number of taxid/index pairs.
|
||||
*/
|
||||
ecomerged_t merged[]; /**< Array of taxid/index pairs.
|
||||
*/
|
||||
} ecomergedidx_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Structure for a taxonomy.
|
||||
*/
|
||||
typedef struct OBIDMS_taxonomy_t {
|
||||
char tax_name[TAX_NAME_LEN];
|
||||
OBIDMS_p dms;
|
||||
ecorankidx_t* ranks;
|
||||
econameidx_t* names;
|
||||
ecotxidx_t* taxa;
|
||||
char tax_name[TAX_NAME_LEN]; /**< Taxonomy name.
|
||||
*/
|
||||
OBIDMS_p dms; /**< A pointer on the DMS to which the taxonomy belongs.
|
||||
*/
|
||||
ecomergedidx_t* merged_idx; /**< Merged taxid index.
|
||||
*/
|
||||
ecorankidx_t* ranks; /**< Taxonomic ranks.
|
||||
*/
|
||||
econameidx_t* names; /**< Taxon names.
|
||||
*/
|
||||
econameidx_t* preferred_names; /**< Taxon preferred names (i.e. added locally).
|
||||
*/
|
||||
ecotxidx_t* taxa; /**< Taxa.
|
||||
*/
|
||||
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
||||
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
|
||||
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
|
||||
|
||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
||||
|
||||
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
|
||||
|
||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
int write_rankidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
|
||||
int write_taxonomyidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
|
||||
int write_nameidx(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* taxonomy_name);
|
||||
|
||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
||||
|
||||
/**
|
||||
* @brief Function reading an NCBI taxdump and loading its information into a taxonomy structure.
|
||||
*
|
||||
* @param taxdump The path to the taxdump directory.
|
||||
*
|
||||
* @returns A pointer on the read taxonomy structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
|
||||
|
||||
int obi_taxonomy_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
||||
|
||||
/**
|
||||
* @brief Function reading a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files)
|
||||
* and loading its information into a taxonomy structure.
|
||||
*
|
||||
* @param dms A pointer on the DMS to which the taxonomy belongs.
|
||||
* @param taxonomy_name The name (prefix) of the taxonomy.
|
||||
* @param read_alternative_names A boolean indicating whether names other than scientific and preferred names should be read.
|
||||
*
|
||||
* @returns A pointer on the read taxonomy structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function writing a binary taxonomy database (i.e. a set of .tdx, .ndx, .rdx, .adx, .ldx, .pdx files).
|
||||
*
|
||||
* @param dms A pointer on the DMS to which the taxonomy belongs.
|
||||
* @param tax A pointer on the taxonomy structure.
|
||||
* @param tax_name The name (prefix) of the taxonomy.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function closing a taxonomy structure.
|
||||
*
|
||||
* This function writes all changes to the binary files (local taxa and preferred names) and free all allocated memory for the structure.
|
||||
*
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function adding a local taxon to a taxonomy.
|
||||
*
|
||||
* @param tax A pointer on the taxonomy structure.
|
||||
* @param name The taxon scientific name.
|
||||
* @param rank_name The taxon rank name.
|
||||
* @param parent_taxid The taxid of the parent node in the taxonomic tree.
|
||||
* @param min_taxid The minimum taxid to give to the new taxon (the function will choose a new taxid >= min_taxid and >= MIN_LOCAL_TAXID).
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function adding a preferred name to a taxon in a taxonomy, referred to by its taxid.
|
||||
*
|
||||
* @param tax A pointer on the taxonomy structure.
|
||||
* @param taxid The taxid of the taxon that should have a new preferred name.
|
||||
* @param preferred_name The new preferred name.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function adding a preferred name to a taxon in a taxonomy, referred to by the taxon pointer.
|
||||
*
|
||||
* @param tax A pointer on the taxonomy structure.
|
||||
* @param taxon A pointer on the taxon that should have a new preferred name.
|
||||
* @param preferred_name The new preferred name.
|
||||
*
|
||||
* @returns An integer value indicating the success of the operation.
|
||||
* @retval 0 on success.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at a given rank.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param rankidx The index of the rank wanted.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the wanted rank.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning a taxon given its taxid.
|
||||
*
|
||||
* @param taxonomy A pointer on the taxonomy.
|
||||
* @param taxid The taxid of the taxon.
|
||||
*
|
||||
* @returns A pointer on the wanted taxon.
|
||||
* @retval NULL if no taxon was found with the given taxid.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function checking whether a taxon is under another in the taxonomy tree.
|
||||
*
|
||||
* @param taxon A pointer on the first taxon.
|
||||
* @param other_taxid The taxid of the second taxon.
|
||||
*
|
||||
* @returns A boolean indicating whether the first taxon is under the second taxon in the taxonomy tree.
|
||||
*/
|
||||
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the species level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the species level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the genus level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the genus level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the family level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the family level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the kingdom level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the kingdom level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function returning the parent of a taxon at the superkingdom level.
|
||||
*
|
||||
* @param taxon A pointer on the taxon.
|
||||
* @param taxonomy A pointer on the taxonomy structure.
|
||||
*
|
||||
* @returns A pointer on the parent taxon at the superkingdom level.
|
||||
* @retval NULL if no parent taxon was found at the wanted rank.
|
||||
*/
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
|
@ -119,7 +119,7 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
|
||||
|
||||
/**
|
||||
* @brief Internal function building the default elements names of the lines of a
|
||||
* column (i.e. "0;1;2;...;n").
|
||||
* column, with ';' as separator (i.e. "0;1;2;...;n\0").
|
||||
*
|
||||
* @warning The returned pointer has to be freed by the caller.
|
||||
*
|
||||
@ -134,12 +134,61 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
|
||||
static char* build_default_elements_names(index_t nb_elements_per_line);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function formatting the elements names of the lines of a
|
||||
* column with '\0' as separator (e.g. "0\01\02\0...\0n\0").
|
||||
*
|
||||
* @param elements_names The character string formatted with ';' as separator (e.g. "0;1;2;...;n\0").
|
||||
* @param elts_names_length A pointer on an integer where the function will store the length of the character string.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static void format_elements_names(char* elements_names, int* elts_names_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function comparing two element names using their sorted index, using data stored in the column header.
|
||||
*
|
||||
* @param n1_sort_idx A pointer on the sorted index of the first name.
|
||||
* @param n2_sort_idx A pointer on the sorted index of the second name.
|
||||
* @param h A pointer on the column header.
|
||||
*
|
||||
* @returns A value < 0 if name1 < name2,
|
||||
* a value > 0 if name1 > name2,
|
||||
* and 0 if name1 == name2.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function comparing two element names using a pointer on the first name and the sorted index of the second name,
|
||||
* using data stored in the column header.
|
||||
*
|
||||
* @param name1 A pointer on the first name.
|
||||
* @param n2_sort_idx A pointer on the sorted index of the second name.
|
||||
* @param h A pointer on the column header.
|
||||
*
|
||||
* @returns A value < 0 if name1 < name2,
|
||||
* a value > 0 if name1 > name2,
|
||||
* and 0 if name1 == name2.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function setting the elements names of the lines of a
|
||||
* column in the header of the OBIDMS column structure.
|
||||
*
|
||||
* @param column A pointer as returned by obi_create_column().
|
||||
* @param elements_names The names of the elements with ';' as separator.
|
||||
* @param elements_names The names of the elements as formatted by format_elements_names().
|
||||
* @param elts_names_length The length of elements_names.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
@ -147,7 +196,35 @@ static char* build_default_elements_names(index_t nb_elements_per_line);
|
||||
* @since July 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names);
|
||||
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function counting the number of elements names in a character array.
|
||||
*
|
||||
* @param elements_names A pointer on the character string corresponding to the elements names,
|
||||
* formatted with ';' or with '\0' as separator.
|
||||
* @param elt_names_formatted Whether the separator is ';' (false), or '\0' (true, as formatted by format_elements_names()).
|
||||
*
|
||||
* @returns The number of elements names in the character array.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function computing the length of a character array containing elements names as formatted by format_elements_names().
|
||||
*
|
||||
* @param elements_names A pointer on the character string corresponding to the elements names as formatted by format_elements_names().
|
||||
*
|
||||
* @returns The length of a character array.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int get_formatted_elt_names_length(const char* elements_names);
|
||||
|
||||
|
||||
/**
|
||||
@ -198,6 +275,7 @@ static char* build_column_file_name(const char* column_name, obiversion_t versio
|
||||
}
|
||||
|
||||
|
||||
|
||||
static char* build_version_file_name(const char* column_name)
|
||||
{
|
||||
char* file_name;
|
||||
@ -222,6 +300,7 @@ static char* build_version_file_name(const char* column_name)
|
||||
}
|
||||
|
||||
|
||||
|
||||
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block)
|
||||
{
|
||||
off_t loc_size;
|
||||
@ -346,6 +425,7 @@ static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_
|
||||
}
|
||||
|
||||
|
||||
|
||||
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory)
|
||||
{
|
||||
off_t loc_size;
|
||||
@ -437,10 +517,12 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
|
||||
}
|
||||
|
||||
|
||||
|
||||
static char* build_default_elements_names(index_t nb_elements_per_line)
|
||||
{
|
||||
char* elements_names;
|
||||
int i;
|
||||
int len;
|
||||
|
||||
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
|
||||
if (elements_names == NULL)
|
||||
@ -457,31 +539,169 @@ static char* build_default_elements_names(index_t nb_elements_per_line)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i= 0; i < nb_elements_per_line; i++)
|
||||
sprintf(elements_names, "%d", i);
|
||||
len = 0;
|
||||
for (i = 0; i < nb_elements_per_line; i++)
|
||||
len += sprintf(elements_names+len, "%d;", i);
|
||||
|
||||
// Terminal character
|
||||
elements_names[strlen(elements_names)] = '\0';
|
||||
elements_names[len-1] = '\0'; // -1 to delete last ';'
|
||||
len--;
|
||||
|
||||
return elements_names;
|
||||
}
|
||||
|
||||
|
||||
int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names)
|
||||
|
||||
static void format_elements_names(char* elements_names, int* elts_names_length)
|
||||
{
|
||||
if (strlen(elements_names) > ELEMENTS_NAMES_MAX)
|
||||
int i;
|
||||
|
||||
*elts_names_length = strlen(elements_names);
|
||||
|
||||
// Replace the ';' with '\0'
|
||||
for (i=0; i < *elts_names_length; i++)
|
||||
{
|
||||
if (elements_names[i] == ';')
|
||||
elements_names[i] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h)
|
||||
{
|
||||
char* name1=NULL;
|
||||
char* name2=NULL;
|
||||
|
||||
int name1_idx;
|
||||
int name2_idx;
|
||||
|
||||
int name1_sort_idx = *((int*)n1_sort_idx);
|
||||
int name2_sort_idx = *((int*)n2_sort_idx);
|
||||
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
|
||||
|
||||
name1_idx = (header->elements_names_idx)[name1_sort_idx];
|
||||
name1 = (header->elements_names)+name1_idx;
|
||||
|
||||
name2_idx = (header->elements_names_idx)[name2_sort_idx];
|
||||
name2 = (header->elements_names)+name2_idx;
|
||||
|
||||
return strcmp(name1, name2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h)
|
||||
{
|
||||
char* name2=NULL;
|
||||
int name2_idx;
|
||||
|
||||
int name2_sort_idx = *((int*)n2_sort_idx);
|
||||
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
|
||||
|
||||
name2_idx = (header->elements_names_idx)[name2_sort_idx];
|
||||
name2 = (header->elements_names)+name2_idx;
|
||||
|
||||
return strcmp(name1, name2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
// Check that the elements names are not too long
|
||||
if (elts_names_length+2 > ELEMENTS_NAMES_MAX)
|
||||
{
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError: element names too long (max: %d)", ELEMENTS_NAMES_MAX);
|
||||
return -1;
|
||||
}
|
||||
|
||||
strcpy((column->header)->elements_names, elements_names);
|
||||
// Copy the elements names in the header
|
||||
memcpy((column->header)->elements_names, elements_names, elts_names_length*sizeof(char));
|
||||
|
||||
// Terminal characters
|
||||
(column->header)->elements_names[elts_names_length] = '\0';
|
||||
(column->header)->elements_names[elts_names_length + 1] = '\0';
|
||||
|
||||
// Store the length of the character array containing the elements names
|
||||
(column->header)->elements_names_length = elts_names_length;
|
||||
|
||||
// Build the elements names index
|
||||
i = 0;
|
||||
j = 0;
|
||||
// Index the first element name
|
||||
((column->header)->elements_names_idx)[j] = i;
|
||||
((column->header)->sorted_elements_idx)[j] = j;
|
||||
i++;
|
||||
j++;
|
||||
while (i < elts_names_length)
|
||||
{
|
||||
if (elements_names[i] == '\0')
|
||||
{ // Index new element name
|
||||
((column->header)->elements_names_idx)[j] = i+1;
|
||||
((column->header)->sorted_elements_idx)[j] = j;
|
||||
j++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
// Build the sorted index
|
||||
qsort_user_data((column->header)->sorted_elements_idx, j, sizeof(int), column->header, cmp_elements_names_with_idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
|
||||
|
||||
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted)
|
||||
{
|
||||
char sep;
|
||||
int i = 0;
|
||||
bool stop = false;
|
||||
index_t count = 0;
|
||||
|
||||
if (elt_names_formatted)
|
||||
sep = FORMATTED_ELT_NAMES_SEPARATOR;
|
||||
else
|
||||
sep = NOT_FORMATTED_ELT_NAMES_SEPARATOR;
|
||||
|
||||
while (! stop)
|
||||
{
|
||||
if ((elt_names_formatted && (elements_names[i] == '\0') && (elements_names[i+1] == '\0')) ||
|
||||
((! elt_names_formatted) && (elements_names[i] == '\0')))
|
||||
stop = true;
|
||||
if ((elements_names[i] == sep) || (elements_names[i] == '\0'))
|
||||
count++;
|
||||
i++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int get_formatted_elt_names_length(const char* elements_names)
|
||||
{
|
||||
int i = 0;
|
||||
bool stop = false;
|
||||
|
||||
while (! stop)
|
||||
{
|
||||
if ((elements_names[i] == '\0') && (elements_names[i+1] == '\0'))
|
||||
stop = true;
|
||||
else
|
||||
i++;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
|
||||
{
|
||||
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
|
||||
}
|
||||
@ -493,6 +713,7 @@ index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_lin
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory)
|
||||
{
|
||||
off_t loc_size;
|
||||
@ -557,6 +778,7 @@ obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_dire
|
||||
}
|
||||
|
||||
|
||||
|
||||
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name)
|
||||
{
|
||||
OBIDMS_column_directory_p column_directory;
|
||||
@ -582,6 +804,7 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c
|
||||
}
|
||||
|
||||
|
||||
|
||||
size_t obi_get_platform_header_size()
|
||||
{
|
||||
size_t header_size;
|
||||
@ -607,7 +830,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
const char* indexer_name,
|
||||
const char* associated_column_name,
|
||||
obiversion_t associated_column_version,
|
||||
const char* comments
|
||||
const char* comments,
|
||||
bool elt_names_formatted
|
||||
)
|
||||
{
|
||||
OBIDMS_column_p new_column;
|
||||
@ -623,6 +847,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
OBIType_t returned_data_type;
|
||||
OBIType_t stored_data_type;
|
||||
char* final_indexer_name;
|
||||
char* built_elements_names = NULL;
|
||||
int elts_names_length;
|
||||
|
||||
new_column = NULL;
|
||||
|
||||
@ -695,31 +921,29 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
else if (nb_lines < minimum_line_count)
|
||||
nb_lines = minimum_line_count;
|
||||
|
||||
// Check and build if needed the element names
|
||||
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0)) // Build the default element names: str of the element index
|
||||
// Check, format, and build if needed the element names
|
||||
if ((elements_names == NULL) || (*elements_names == '\0')) // Build the default element names: str of the element index
|
||||
{
|
||||
elements_names = build_default_elements_names(nb_elements_per_line);
|
||||
if (elements_names == NULL)
|
||||
built_elements_names = build_default_elements_names(nb_elements_per_line);
|
||||
if (built_elements_names == NULL)
|
||||
return NULL;
|
||||
elements_names = built_elements_names;
|
||||
}
|
||||
else if (((elements_names == NULL) || (strcmp(elements_names, "") != 0)) && (nb_elements_per_line > 1))
|
||||
else
|
||||
{ // The number of elements names should be equal to the number of elements per line
|
||||
char* token;
|
||||
index_t n = 0;
|
||||
token = strdup(elements_names);
|
||||
token = strtok(token, ";");
|
||||
while (token != NULL)
|
||||
if (check_elt_names_count(elements_names, elt_names_formatted) != nb_elements_per_line)
|
||||
{
|
||||
token = strtok(NULL, ";");
|
||||
n++;
|
||||
}
|
||||
if (n != nb_elements_per_line)
|
||||
{
|
||||
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
|
||||
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line:"
|
||||
"\n%lld elements per line\nelements names:%s\n", nb_elements_per_line, elements_names);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
// TODO what if 1 element and name specified? doc
|
||||
|
||||
// Format the elements names string
|
||||
if (! elt_names_formatted)
|
||||
format_elements_names(elements_names, &elts_names_length);
|
||||
else
|
||||
elts_names_length = get_formatted_elt_names_length(elements_names);
|
||||
|
||||
// Calculate the size needed
|
||||
header_size = obi_get_platform_header_size();
|
||||
@ -816,11 +1040,11 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
header->version = version_number;
|
||||
header->cloned_from = -1;
|
||||
|
||||
obi_column_set_elements_names(new_column, elements_names);
|
||||
set_elements_names(new_column, elements_names, elts_names_length);
|
||||
|
||||
// Free the element names if they were built
|
||||
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0))
|
||||
free(elements_names);
|
||||
if (built_elements_names != NULL)
|
||||
free(built_elements_names);
|
||||
|
||||
strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME);
|
||||
|
||||
@ -886,6 +1110,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
}
|
||||
|
||||
|
||||
|
||||
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
const char* column_name,
|
||||
obiversion_t version_number)
|
||||
@ -1043,6 +1268,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
}
|
||||
|
||||
|
||||
|
||||
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
||||
OBIDMS_column_p line_selection,
|
||||
const char* column_name,
|
||||
@ -1083,7 +1309,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
||||
(column_to_clone->header)->indexer_name,
|
||||
((column_to_clone->header)->associated_column).column_name,
|
||||
((column_to_clone->header)->associated_column).version,
|
||||
(column_to_clone->header)->comments
|
||||
(column_to_clone->header)->comments,
|
||||
true
|
||||
);
|
||||
|
||||
if (new_column == NULL)
|
||||
@ -1097,6 +1324,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
(new_column->header)->cloned_from = (column_to_clone->header)->version;
|
||||
|
||||
if (clone_data && (line_selection == NULL))
|
||||
@ -1137,6 +1366,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
||||
}
|
||||
|
||||
|
||||
|
||||
int obi_close_column(OBIDMS_column_p column)
|
||||
{
|
||||
int ret_val = 0;
|
||||
@ -1185,6 +1415,7 @@ int obi_close_column(OBIDMS_column_p column)
|
||||
}
|
||||
|
||||
|
||||
|
||||
int obi_clone_column_indexer(OBIDMS_column_p column)
|
||||
{
|
||||
char* new_indexer_name;
|
||||
@ -1208,6 +1439,7 @@ int obi_clone_column_indexer(OBIDMS_column_p column)
|
||||
}
|
||||
|
||||
|
||||
|
||||
int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap/remap?
|
||||
{
|
||||
size_t file_size;
|
||||
@ -1309,6 +1541,7 @@ int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap
|
||||
}
|
||||
|
||||
|
||||
|
||||
int obi_enlarge_column(OBIDMS_column_p column)
|
||||
{
|
||||
size_t file_size;
|
||||
@ -1363,7 +1596,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
||||
header_size = (column->header)->header_size;
|
||||
file_size = header_size + new_data_size;
|
||||
|
||||
// Enlarge the file // TODO isn't it possible that this makes the file "move"?
|
||||
// Enlarge the file
|
||||
if (ftruncate(column_file_descriptor, file_size) < 0)
|
||||
{
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
@ -1414,6 +1647,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
||||
}
|
||||
|
||||
|
||||
|
||||
void obi_ini_to_NA_values(OBIDMS_column_p column,
|
||||
index_t first_line_nb,
|
||||
index_t nb_lines)
|
||||
@ -1479,6 +1713,7 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
|
||||
}
|
||||
|
||||
|
||||
|
||||
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number)
|
||||
{
|
||||
OBIDMS_column_header_p header;
|
||||
@ -1562,6 +1797,7 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
|
||||
}
|
||||
|
||||
|
||||
|
||||
int obi_close_header(OBIDMS_column_header_p header)
|
||||
{
|
||||
if (munmap(header, header->header_size) < 0)
|
||||
@ -1574,47 +1810,56 @@ int obi_close_header(OBIDMS_column_header_p header)
|
||||
}
|
||||
|
||||
|
||||
// TODO to be rewritten in an optimized and safe way if possible
|
||||
|
||||
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
|
||||
{
|
||||
char* elements_names;
|
||||
char* name;
|
||||
index_t element_index;
|
||||
int* elt_names_idx;
|
||||
|
||||
elements_names = strdup((column->header)->elements_names);
|
||||
if (elements_names == NULL)
|
||||
{
|
||||
obidebug(1, "\nError strdup-ing the elements names");
|
||||
return OBIIdx_NA;
|
||||
}
|
||||
elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(int), column->header, cmp_elements_names_with_name_and_idx);
|
||||
|
||||
element_index = 0;
|
||||
if (elt_names_idx != NULL)
|
||||
return (index_t)(*elt_names_idx);
|
||||
|
||||
name = strtok(elements_names, ";"); // TODO not thread safe, see strtok_r maybe
|
||||
if (strcmp(element_name, name) == 0)
|
||||
{
|
||||
free(elements_names);
|
||||
return element_index;
|
||||
}
|
||||
element_index++;
|
||||
|
||||
while (name != NULL)
|
||||
{
|
||||
name = strtok(NULL, ";"); // TODO not thread safe, see strtok_r maybe
|
||||
if (strcmp(element_name, name) == 0)
|
||||
{
|
||||
free(elements_names);
|
||||
return element_index;
|
||||
}
|
||||
element_index++;
|
||||
}
|
||||
|
||||
obidebug(1, "\nCan't find an element name");
|
||||
free(elements_names);
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError: could not find element name %s", element_name);
|
||||
return OBIIdx_NA;
|
||||
}
|
||||
|
||||
|
||||
// TODO doc, returns elements names with ; as separator (discuss maybe char**)
|
||||
char* obi_get_elements_names(OBIDMS_column_p column)
|
||||
{
|
||||
char* elements_names;
|
||||
int i, j;
|
||||
int elt_idx;
|
||||
int len;
|
||||
|
||||
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
|
||||
if (elements_names == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for elements names");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
j = 0;
|
||||
for (i=0; i < (column->header)->nb_elements_per_line; i++)
|
||||
{
|
||||
elt_idx = ((column->header)->elements_names_idx)[i];
|
||||
len = strlen(((column->header)->elements_names)+elt_idx);
|
||||
memcpy(elements_names+j, ((column->header)->elements_names)+elt_idx, len*sizeof(char));
|
||||
j = j + len;
|
||||
elements_names[j] = ';';
|
||||
j++;
|
||||
}
|
||||
|
||||
elements_names[j - 1] = '\0';
|
||||
|
||||
return elements_names;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
|
||||
{
|
||||
// Check if the column is read-only
|
||||
@ -1649,6 +1894,7 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
|
||||
}
|
||||
|
||||
|
||||
|
||||
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
|
||||
{
|
||||
if ((line_nb+1) > ((column->header)->line_count))
|
||||
|
@ -31,7 +31,7 @@
|
||||
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
|
||||
*/
|
||||
#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
|
||||
* are used ("0;1;2;...;n"), considering ELEMENTS_NAMES_MAX.
|
||||
* are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX.
|
||||
*/
|
||||
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
|
||||
*/
|
||||
@ -39,6 +39,10 @@
|
||||
*/
|
||||
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
|
||||
*/
|
||||
#define FORMATTED_ELT_NAMES_SEPARATOR '\0' /**< The maximum length for comments.
|
||||
*/
|
||||
#define NOT_FORMATTED_ELT_NAMES_SEPARATOR ';' /**< The maximum length for comments.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@ -66,9 +70,15 @@ typedef struct OBIDMS_column_header {
|
||||
*/
|
||||
index_t nb_elements_per_line; /**< Number of elements per line.
|
||||
*/
|
||||
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator
|
||||
* (no terminal ';').
|
||||
* (default are the indices: "0;1;2;...;n").
|
||||
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with '\0' as separator
|
||||
* and '\0\0' as terminal flag.
|
||||
* (default are the indices: "0\01\02\0...\0n\0\0").
|
||||
*/
|
||||
int elements_names_length; /**< Length of the character array where the elements names are stored.
|
||||
*/
|
||||
int elements_names_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the start of each element name in elements_names.
|
||||
*/
|
||||
int sorted_elements_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the sorted element names in elements_names_idx.
|
||||
*/
|
||||
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
|
||||
* element from the column.
|
||||
@ -184,12 +194,13 @@ size_t obi_get_platform_header_size();
|
||||
* @param nb_lines The number of lines to be stored.
|
||||
* @param nb_elements_per_line The number of elements per line. // TODO talk about default values
|
||||
* @param elements_names The names of the elements with ';' as separator (no terminal ';'),
|
||||
* NULL or "" if the default names are to be used ("0;1;2;...;n").
|
||||
* NULL or "" if the default names are to be used ("0\01\02\0...\0n").
|
||||
* @param indexer_name The name of the indexer if there is one associated with the column.
|
||||
* If NULL or "", the indexer name is set as the column name.
|
||||
* @param associated_column_name The name of the associated column if there is one.
|
||||
* @param associated_column_version The version of the associated column if there is one.
|
||||
* @param comments Optional comments associated with the column.
|
||||
* @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
|
||||
*
|
||||
* @returns A pointer on the newly created column structure.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -206,7 +217,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
const char* indexer_name,
|
||||
const char* associated_column_name,
|
||||
obiversion_t associated_column_version,
|
||||
const char* comments
|
||||
const char* comments,
|
||||
bool elt_names_formatted
|
||||
);
|
||||
|
||||
|
||||
@ -353,7 +365,7 @@ int obi_close_header(OBIDMS_column_header_p header);
|
||||
* @param element_name The name of the element.
|
||||
*
|
||||
* @returns The index of the element in a line of the column.
|
||||
* @retval OBIIdx_NA if an error occurred. // TODO not sure if this is "clean".
|
||||
* @retval OBIIdx_NA if an error occurred.
|
||||
*
|
||||
* @since July 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
@ -361,6 +373,22 @@ int obi_close_header(OBIDMS_column_header_p header);
|
||||
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers the elements names of the lines of a column, with ';' as separator (i.e. "0;1;2;...;n\0").
|
||||
*
|
||||
* @warning The returned pointer has to be freed by the caller.
|
||||
*
|
||||
* @param column A pointer on an OBIDMS column.
|
||||
*
|
||||
* @returns A pointer on a character array where the elements names are stored.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_get_elements_names(OBIDMS_column_p column);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Prepares a column to set a value.
|
||||
*
|
||||
|
527
src/obiview.c
527
src/obiview.c
@ -47,7 +47,7 @@
|
||||
|
||||
|
||||
/**
|
||||
* Internal function building the file name where the informations about an obiview are stored.
|
||||
* Internal function building the file name where the informations about a finished, read-only obiview are stored.
|
||||
*
|
||||
* @warning The returned pointer has to be freed by the caller.
|
||||
*
|
||||
@ -63,7 +63,23 @@ static char* build_obiview_file_name(const char* view_name);
|
||||
|
||||
|
||||
/**
|
||||
* Internal function checking if a view with a given name already exists in a DMS.
|
||||
* Internal function building the file name where the informations about an unfinished, writable obiview are stored.
|
||||
*
|
||||
* @warning The returned pointer has to be freed by the caller.
|
||||
*
|
||||
* @param view_name The name of the view.
|
||||
*
|
||||
* @returns A pointer to the file name.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since February 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static char* build_unfinished_obiview_file_name(const char* view_name);
|
||||
|
||||
|
||||
/**
|
||||
* Internal function checking if a view (either finished or unfinished) with a given name already exists in a DMS.
|
||||
*
|
||||
* @param dms The DMS.
|
||||
* @param view_name The name of the view.
|
||||
@ -73,7 +89,7 @@ static char* build_obiview_file_name(const char* view_name);
|
||||
* @since September 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
bool view_exists(OBIDMS_p dms, const char* view_name);
|
||||
static bool view_exists(OBIDMS_p dms, const char* view_name);
|
||||
|
||||
|
||||
/**
|
||||
@ -84,7 +100,7 @@ bool view_exists(OBIDMS_p dms, const char* view_name);
|
||||
* @since June 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
size_t get_platform_view_file_size();
|
||||
static size_t get_platform_view_file_size();
|
||||
|
||||
|
||||
/**
|
||||
@ -99,7 +115,7 @@ size_t get_platform_view_file_size();
|
||||
* @since August 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int enlarge_view_file(Obiview_p view, size_t new_size);
|
||||
static int enlarge_view_file(Obiview_p view, size_t new_size);
|
||||
|
||||
|
||||
/**
|
||||
@ -117,7 +133,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size);
|
||||
* @since August 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int write_comments_to_view_file(Obiview_p view, const char* comments);
|
||||
static int write_comments_to_view_file(Obiview_p view, const char* comments);
|
||||
|
||||
|
||||
/**
|
||||
@ -134,7 +150,7 @@ int write_comments_to_view_file(Obiview_p view, const char* comments);
|
||||
* @since June 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int create_obiview_file(OBIDMS_p dms, const char* view_name);
|
||||
static int create_obiview_file(OBIDMS_p dms, const char* view_name);
|
||||
|
||||
|
||||
/**
|
||||
@ -156,7 +172,7 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name);
|
||||
* @since June 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void update_column_refs(Obiview_p view);
|
||||
static void update_column_refs(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -175,7 +191,7 @@ void update_column_refs(Obiview_p view);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int create_column_dict(Obiview_p view);
|
||||
static int create_column_dict(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -194,7 +210,7 @@ int create_column_dict(Obiview_p view);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int update_column_dict(Obiview_p view);
|
||||
static int update_column_dict(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -219,7 +235,7 @@ int update_column_dict(Obiview_p view);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int update_column_refs_and_dict(Obiview_p view);
|
||||
static int update_column_refs_and_dict(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -239,7 +255,7 @@ int update_column_refs_and_dict(Obiview_p view);
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int update_lines(Obiview_p view, index_t line_count);
|
||||
static int update_lines(Obiview_p view, index_t line_count);
|
||||
|
||||
|
||||
/**
|
||||
@ -257,7 +273,71 @@ int update_lines(Obiview_p view, index_t line_count);
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Saves a view, updating its informations in the view file.
|
||||
*
|
||||
* @warning The view must be writable.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int save_view(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Rename a view file once the view is finished, replacing the '*.obiview_unfinished' extension with '*.obiview'.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since February 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int rename_finished_view(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Finishes a view: check the predicates, save all the informations, rename the view file.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since February 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int finish_view(Obiview_p view);
|
||||
|
||||
/**
|
||||
* @brief Closes an opened view.
|
||||
*
|
||||
* @warning Doesn't save the view.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @see obi_save_and_close_view()
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int close_view(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -276,7 +356,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p);
|
||||
static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p);
|
||||
|
||||
|
||||
/**
|
||||
@ -294,7 +374,7 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
|
||||
static int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
|
||||
|
||||
|
||||
/****** PREDICATE FUNCTIONS *******/
|
||||
@ -313,7 +393,7 @@ int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* view_has_nuc_sequence_column(Obiview_p view);
|
||||
static char* view_has_nuc_sequence_column(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -330,7 +410,7 @@ char* view_has_nuc_sequence_column(Obiview_p view);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* view_has_quality_column(Obiview_p view);
|
||||
static char* view_has_quality_column(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -347,7 +427,7 @@ char* view_has_quality_column(Obiview_p view);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* view_has_id_column(Obiview_p view);
|
||||
static char* view_has_id_column(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -364,7 +444,7 @@ char* view_has_id_column(Obiview_p view);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* view_has_definition_column(Obiview_p view);
|
||||
static char* view_has_definition_column(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -381,7 +461,7 @@ char* view_has_definition_column(Obiview_p view);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* view_check_qual_match_seqs(Obiview_p view);
|
||||
static char* view_check_qual_match_seqs(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
@ -396,7 +476,7 @@ char* view_check_qual_match_seqs(Obiview_p view);
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view));
|
||||
static char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view));
|
||||
|
||||
|
||||
/**
|
||||
@ -410,7 +490,7 @@ char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obivi
|
||||
* @since July 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* view_check_all_predicates(Obiview_p view);
|
||||
static char* view_check_all_predicates(Obiview_p view);
|
||||
|
||||
|
||||
/************************************************************************
|
||||
@ -443,11 +523,35 @@ static char* build_obiview_file_name(const char* view_name)
|
||||
}
|
||||
|
||||
|
||||
bool view_exists(OBIDMS_p dms, const char* view_name)
|
||||
static char* build_unfinished_obiview_file_name(const char* view_name)
|
||||
{
|
||||
char* file_name;
|
||||
|
||||
// Build file name
|
||||
file_name = (char*) malloc((strlen(view_name) + 19 + 1)*sizeof(char));
|
||||
if (file_name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a view file name");
|
||||
return NULL;
|
||||
}
|
||||
if (sprintf(file_name, "%s.obiview_unfinished", view_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nProblem building an unfinished obiview file name");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return file_name;
|
||||
}
|
||||
|
||||
|
||||
static bool view_exists(OBIDMS_p dms, const char* view_name)
|
||||
{
|
||||
struct dirent* dp;
|
||||
char* file_name;
|
||||
|
||||
// Check finished views
|
||||
// Create file name
|
||||
file_name = build_obiview_file_name(view_name);
|
||||
if (file_name == NULL)
|
||||
@ -458,13 +562,38 @@ bool view_exists(OBIDMS_p dms, const char* view_name)
|
||||
if ((dp->d_name)[0] == '.')
|
||||
continue;
|
||||
if (strcmp(dp->d_name, file_name) == 0)
|
||||
{
|
||||
free(file_name);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
free(file_name);
|
||||
|
||||
// Check unfinished views
|
||||
// Create file name
|
||||
file_name = build_unfinished_obiview_file_name(view_name);
|
||||
if (file_name == NULL)
|
||||
return -1;
|
||||
|
||||
while ((dp = readdir(dms->view_directory)) != NULL)
|
||||
{
|
||||
if ((dp->d_name)[0] == '.')
|
||||
continue;
|
||||
if (strcmp(dp->d_name, file_name) == 0)
|
||||
{
|
||||
free(file_name);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
free(file_name);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
size_t get_platform_view_file_size()
|
||||
static size_t get_platform_view_file_size()
|
||||
{
|
||||
size_t obiview_size;
|
||||
size_t rounded_obiview_size;
|
||||
@ -480,7 +609,7 @@ size_t get_platform_view_file_size()
|
||||
}
|
||||
|
||||
|
||||
int enlarge_view_file(Obiview_p view, size_t new_size)
|
||||
static int enlarge_view_file(Obiview_p view, size_t new_size)
|
||||
{
|
||||
int obiview_file_descriptor;
|
||||
double multiple;
|
||||
@ -488,7 +617,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size)
|
||||
char* file_name;
|
||||
|
||||
// Create file name
|
||||
file_name = build_obiview_file_name((view->infos)->name);
|
||||
file_name = build_unfinished_obiview_file_name((view->infos)->name);
|
||||
if (file_name == NULL)
|
||||
return -1;
|
||||
|
||||
@ -556,7 +685,7 @@ int enlarge_view_file(Obiview_p view, size_t new_size)
|
||||
}
|
||||
|
||||
|
||||
int write_comments_to_view_file(Obiview_p view, const char* comments)
|
||||
static int write_comments_to_view_file(Obiview_p view, const char* comments)
|
||||
{
|
||||
size_t new_size;
|
||||
|
||||
@ -580,14 +709,14 @@ int write_comments_to_view_file(Obiview_p view, const char* comments)
|
||||
}
|
||||
|
||||
|
||||
int create_obiview_file(OBIDMS_p dms, const char* view_name)
|
||||
static int create_obiview_file(OBIDMS_p dms, const char* view_name)
|
||||
{
|
||||
char* file_name;
|
||||
int obiview_file_descriptor;
|
||||
size_t file_size;
|
||||
|
||||
// Create file name
|
||||
file_name = build_obiview_file_name(view_name);
|
||||
file_name = build_unfinished_obiview_file_name(view_name);
|
||||
if (file_name == NULL)
|
||||
return -1;
|
||||
|
||||
@ -634,7 +763,7 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name)
|
||||
}
|
||||
|
||||
|
||||
void update_column_refs(Obiview_p view)
|
||||
static void update_column_refs(Obiview_p view)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -646,7 +775,7 @@ void update_column_refs(Obiview_p view)
|
||||
}
|
||||
|
||||
|
||||
int create_column_dict(Obiview_p view)
|
||||
static int create_column_dict(Obiview_p view)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -681,7 +810,7 @@ int create_column_dict(Obiview_p view)
|
||||
}
|
||||
|
||||
|
||||
int update_column_dict(Obiview_p view)
|
||||
static int update_column_dict(Obiview_p view)
|
||||
{
|
||||
// Re-initialize the dictionary to rebuild it from scratch
|
||||
ht_free(view->column_dict);
|
||||
@ -693,14 +822,14 @@ int update_column_dict(Obiview_p view)
|
||||
}
|
||||
|
||||
|
||||
int update_column_refs_and_dict(Obiview_p view)
|
||||
static int update_column_refs_and_dict(Obiview_p view)
|
||||
{
|
||||
update_column_refs(view);
|
||||
return update_column_dict(view);
|
||||
}
|
||||
|
||||
|
||||
int update_lines(Obiview_p view, index_t line_count)
|
||||
static int update_lines(Obiview_p view, index_t line_count)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -739,7 +868,7 @@ int update_lines(Obiview_p view, index_t line_count)
|
||||
}
|
||||
|
||||
|
||||
OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
||||
{
|
||||
int i;
|
||||
OBIDMS_column_p column = NULL;
|
||||
@ -799,7 +928,161 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
||||
}
|
||||
|
||||
|
||||
int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
|
||||
static int save_view(Obiview_p view)
|
||||
{
|
||||
// Check that the view is not read-only
|
||||
if (view->read_only)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nError trying to save a read-only view");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Store reference for the line selection associated with that view if there is one
|
||||
if (view->line_selection != NULL) // Unnecessary in theory, the line selection references are already saved
|
||||
{
|
||||
strcpy(((view->infos)->line_selection).column_name, ((view->line_selection)->header)->name);
|
||||
((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
|
||||
(view->infos)->all_lines = false;
|
||||
}
|
||||
else // Necessary because line selection could have been deleted if a column was cloned
|
||||
{
|
||||
(((view->infos)->line_selection).column_name)[0] = '\0';
|
||||
((view->infos)->line_selection).version = -1;
|
||||
(view->infos)->all_lines = true;
|
||||
}
|
||||
|
||||
update_column_refs(view);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int rename_finished_view(Obiview_p view)
|
||||
{
|
||||
char* old_name;
|
||||
char* new_name;
|
||||
char* path_old_name;
|
||||
char* path_new_name;
|
||||
char* full_path_old_name;
|
||||
char* full_path_new_name;
|
||||
|
||||
old_name = build_unfinished_obiview_file_name((view->infos)->name);
|
||||
new_name = build_obiview_file_name((view->infos)->name);
|
||||
|
||||
path_old_name = malloc(MAX_PATH_LEN);
|
||||
path_new_name = malloc(MAX_PATH_LEN);
|
||||
|
||||
strcpy(path_old_name, "VIEWS/");
|
||||
strcat(path_old_name, old_name);
|
||||
|
||||
strcpy(path_new_name, "VIEWS/");
|
||||
strcat(path_new_name, new_name);
|
||||
|
||||
full_path_old_name = obi_dms_get_full_path(view->dms, path_old_name);
|
||||
full_path_new_name = obi_dms_get_full_path(view->dms, path_new_name);
|
||||
|
||||
if (rename(full_path_old_name, full_path_new_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nError renaming the file of a finished view: %s", full_path_new_name);
|
||||
free(old_name);
|
||||
free(new_name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(old_name);
|
||||
free(new_name);
|
||||
free(path_new_name);
|
||||
free(path_old_name);
|
||||
free(full_path_old_name);
|
||||
free(full_path_new_name);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int finish_view(Obiview_p view)
|
||||
{
|
||||
char* predicates;
|
||||
|
||||
// Check that the view is not read-only
|
||||
if (view->read_only)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nError trying to save a read-only view");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check predicates
|
||||
predicates = view_check_all_predicates(view);
|
||||
if (predicates == NULL)
|
||||
{
|
||||
obidebug(1, "\nView predicates not respected");
|
||||
return -1; // TODO reverse view (delete files)
|
||||
}
|
||||
else
|
||||
{
|
||||
write_comments_to_view_file(view, predicates);
|
||||
free(predicates);
|
||||
}
|
||||
|
||||
if (save_view(view) < 0)
|
||||
return -1;
|
||||
|
||||
if (rename_finished_view(view) < 0)
|
||||
return -1;
|
||||
|
||||
// Flag the view as finished
|
||||
(view->infos)->finished = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int close_view(Obiview_p view)
|
||||
{
|
||||
int i;
|
||||
int ret_value;
|
||||
|
||||
ret_value = 0;
|
||||
|
||||
for (i=0; i < ((view->infos)->column_count); i++)
|
||||
{
|
||||
if (obi_close_column((view->columns)[i]) < 0)
|
||||
{
|
||||
obidebug(1, "\nError closing a column while closing a view");
|
||||
ret_value = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Close line selection if there is one
|
||||
if (view->line_selection != NULL)
|
||||
{
|
||||
if (obi_close_column(view->line_selection) < 0)
|
||||
{
|
||||
obidebug(1, "\nError closing a line selection while closing a view");
|
||||
ret_value = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Free the column dictionary
|
||||
ht_free(view->column_dict);
|
||||
|
||||
// Unmap view file
|
||||
if (obi_view_unmap_file(view->dms, view->infos) < 0)
|
||||
{
|
||||
obidebug(1, "\nError unmaping a view file while closing a view");
|
||||
ret_value = -1;
|
||||
}
|
||||
|
||||
free(view);
|
||||
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
|
||||
static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
|
||||
{
|
||||
int i;
|
||||
char* column_name = NULL;
|
||||
@ -846,7 +1129,7 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
|
||||
}
|
||||
|
||||
|
||||
int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
|
||||
static int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
|
||||
{
|
||||
if (((*line_nb_p)+1) > ((view->infos)->line_count))
|
||||
{
|
||||
@ -865,7 +1148,7 @@ int prepare_to_get_value_from_column(Obiview_p view, index_t* line_nb_p)
|
||||
|
||||
/****** PREDICATE FUNCTIONS *******/
|
||||
|
||||
char* view_has_nuc_sequence_column(Obiview_p view)
|
||||
static char* view_has_nuc_sequence_column(Obiview_p view)
|
||||
{
|
||||
char* predicate;
|
||||
|
||||
@ -889,7 +1172,7 @@ char* view_has_nuc_sequence_column(Obiview_p view)
|
||||
}
|
||||
|
||||
|
||||
char* view_has_quality_column(Obiview_p view)
|
||||
static char* view_has_quality_column(Obiview_p view)
|
||||
{
|
||||
char* predicate;
|
||||
|
||||
@ -913,7 +1196,7 @@ char* view_has_quality_column(Obiview_p view)
|
||||
}
|
||||
|
||||
|
||||
char* view_has_id_column(Obiview_p view)
|
||||
static char* view_has_id_column(Obiview_p view)
|
||||
{
|
||||
char* predicate;
|
||||
|
||||
@ -936,7 +1219,8 @@ char* view_has_id_column(Obiview_p view)
|
||||
}
|
||||
}
|
||||
|
||||
char* view_has_definition_column(Obiview_p view)
|
||||
|
||||
static char* view_has_definition_column(Obiview_p view)
|
||||
{
|
||||
char* predicate;
|
||||
|
||||
@ -960,7 +1244,7 @@ char* view_has_definition_column(Obiview_p view)
|
||||
}
|
||||
|
||||
|
||||
char* view_check_qual_match_seqs(Obiview_p view)
|
||||
static char* view_check_qual_match_seqs(Obiview_p view)
|
||||
{
|
||||
index_t i, j, k;
|
||||
index_t nb_elements_per_line;
|
||||
@ -1053,13 +1337,13 @@ char* view_check_qual_match_seqs(Obiview_p view)
|
||||
}
|
||||
|
||||
|
||||
char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view))
|
||||
static char* view_check_one_predicate(Obiview_p view, char* (*predicate_function)(Obiview_p view))
|
||||
{
|
||||
return predicate_function(view);
|
||||
}
|
||||
|
||||
|
||||
char* view_check_all_predicates(Obiview_p view)
|
||||
static char* view_check_all_predicates(Obiview_p view)
|
||||
{
|
||||
int i, j;
|
||||
size_t size_to_allocate;
|
||||
@ -1195,7 +1479,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
||||
}
|
||||
|
||||
// Map view file
|
||||
view->infos = obi_view_map_file(dms, view_name);
|
||||
view->infos = obi_view_map_file(dms, view_name, false);
|
||||
if (view->infos == NULL)
|
||||
{
|
||||
obidebug(1, "\nError mapping the informations of a new view");
|
||||
@ -1236,7 +1520,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
||||
// If there is a new line selection, build it by combining it with the one from the view to clone if there is one
|
||||
else if (line_selection != NULL)
|
||||
{
|
||||
view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL);
|
||||
view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL, false);
|
||||
if ((view->line_selection) == NULL)
|
||||
{
|
||||
obidebug(1, "\nError creating a column corresponding to a line selection");
|
||||
@ -1305,7 +1589,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
||||
if (write_comments_to_view_file(view, clone_comment) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing comments when creating a view");
|
||||
obi_close_view(view);
|
||||
close_view(view);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -1341,7 +1625,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
||||
if (write_comments_to_view_file(view, comments) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing comments when creating a view");
|
||||
obi_close_view(view);
|
||||
close_view(view);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1360,7 +1644,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
|
||||
// Create the column dictionary (hash table) associating column names (or aliases) to column pointers
|
||||
if (create_column_dict(view) < 0)
|
||||
{
|
||||
obi_close_view(view);
|
||||
close_view(view);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1409,7 +1693,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
|
||||
return NULL;
|
||||
view = obi_new_view(dms, view_name, view_to_clone, line_selection, comments);
|
||||
|
||||
obi_close_view(view_to_clone);
|
||||
close_view(view_to_clone);
|
||||
|
||||
return view;
|
||||
}
|
||||
@ -1511,26 +1795,43 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
|
||||
return NULL;
|
||||
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column);
|
||||
|
||||
obi_close_view(view_to_clone);
|
||||
close_view(view_to_clone);
|
||||
|
||||
return view;
|
||||
}
|
||||
|
||||
|
||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
|
||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bool finished)
|
||||
{
|
||||
char* file_name;
|
||||
Obiview_infos_p view_infos;
|
||||
int obiview_file_descriptor;
|
||||
size_t file_size;
|
||||
int open_flag;
|
||||
int mmap_flag;
|
||||
|
||||
// Create file name
|
||||
if (finished)
|
||||
file_name = build_obiview_file_name(view_name);
|
||||
else
|
||||
file_name = build_unfinished_obiview_file_name(view_name);
|
||||
if (file_name == NULL)
|
||||
return NULL;
|
||||
|
||||
// Set flags (read-only or not)
|
||||
if (finished)
|
||||
{
|
||||
open_flag = O_RDONLY;
|
||||
mmap_flag = PROT_READ;
|
||||
}
|
||||
else
|
||||
{
|
||||
open_flag = O_RDWR;
|
||||
mmap_flag = PROT_READ | PROT_WRITE;
|
||||
}
|
||||
|
||||
// Open view file
|
||||
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDWR, 0777);
|
||||
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, open_flag, 0777);
|
||||
if (obiview_file_descriptor < 0)
|
||||
{
|
||||
if (errno == ENOENT)
|
||||
@ -1560,7 +1861,7 @@ Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
|
||||
// Map the view infos structure
|
||||
view_infos = mmap(NULL,
|
||||
file_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
mmap_flag,
|
||||
MAP_SHARED,
|
||||
obiview_file_descriptor,
|
||||
0
|
||||
@ -1590,12 +1891,15 @@ int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
|
||||
size_t file_size;
|
||||
|
||||
// Get file name
|
||||
if (view_infos->finished)
|
||||
file_name = build_obiview_file_name(view_infos->name);
|
||||
else
|
||||
file_name = build_unfinished_obiview_file_name(view_infos->name);
|
||||
if (file_name == NULL)
|
||||
return -1;
|
||||
|
||||
// Open view file
|
||||
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDWR, 0777);
|
||||
obiview_file_descriptor = openat(dms->view_dir_fd, file_name, O_RDONLY, 0777);
|
||||
if (obiview_file_descriptor < 0)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
@ -1661,13 +1965,9 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
||||
}
|
||||
|
||||
// Map view file
|
||||
view->infos = obi_view_map_file(dms, view_name);
|
||||
|
||||
// Check that the view is finished and can be opened
|
||||
if ((view->infos)->finished == false)
|
||||
view->infos = obi_view_map_file(dms, view_name, true);
|
||||
if ((view->infos) == NULL)
|
||||
{
|
||||
obidebug(1, "\nError opening a view: the view is not finished");
|
||||
obi_view_unmap_file(view->dms, view->infos);
|
||||
free(view);
|
||||
return NULL;
|
||||
}
|
||||
@ -1697,7 +1997,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
||||
if (column_pointer == NULL)
|
||||
{
|
||||
obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, column_name, column_version);
|
||||
obi_close_view(view);
|
||||
close_view(view);
|
||||
return NULL;
|
||||
}
|
||||
(view->columns)[i] = column_pointer;
|
||||
@ -1713,7 +2013,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
||||
if (create_column_dict(view) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the column dictionary when opening a view");
|
||||
obi_close_view(view);
|
||||
close_view(view);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1792,7 +2092,7 @@ int obi_view_add_column(Obiview_p view,
|
||||
// Open or create the column
|
||||
if (create)
|
||||
{ // Create column
|
||||
column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, indexer_name, associated_column_name, associated_column_version, comments);
|
||||
column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, indexer_name, associated_column_name, associated_column_version, comments, false);
|
||||
if (column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError creating a column to add to a view");
|
||||
@ -1963,103 +2263,16 @@ int obi_view_create_column_alias(Obiview_p view, const char* current_name, const
|
||||
}
|
||||
|
||||
|
||||
int obi_save_view(Obiview_p view)
|
||||
{
|
||||
// Check that the view is not read-only
|
||||
if (view->read_only)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nError trying to save a read-only view");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Store reference for the line selection associated with that view if there is one
|
||||
if (view->line_selection != NULL) // Unnecessary in theory, the line selection references are already saved
|
||||
{
|
||||
strcpy(((view->infos)->line_selection).column_name, ((view->line_selection)->header)->name);
|
||||
((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
|
||||
(view->infos)->all_lines = false;
|
||||
}
|
||||
else // Necessary because line selection could have been deleted if a column was cloned
|
||||
{
|
||||
(((view->infos)->line_selection).column_name)[0] = '\0';
|
||||
((view->infos)->line_selection).version = -1;
|
||||
(view->infos)->all_lines = true;
|
||||
}
|
||||
|
||||
update_column_refs(view);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int obi_close_view(Obiview_p view)
|
||||
{
|
||||
int i;
|
||||
int ret_value;
|
||||
|
||||
ret_value = 0;
|
||||
|
||||
for (i=0; i < ((view->infos)->column_count); i++)
|
||||
{
|
||||
if (obi_close_column((view->columns)[i]) < 0)
|
||||
{
|
||||
obidebug(1, "\nError closing a column while closing a view");
|
||||
ret_value = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Close line selection if there is one
|
||||
if (view->line_selection != NULL)
|
||||
{
|
||||
if (obi_close_column(view->line_selection) < 0)
|
||||
{
|
||||
obidebug(1, "\nError closing a line selection while closing a view");
|
||||
ret_value = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Flag the view as finished
|
||||
(view->infos)->finished = true;
|
||||
|
||||
// Free the column dictionary
|
||||
ht_free(view->column_dict);
|
||||
|
||||
// Unmap view file
|
||||
if (obi_view_unmap_file(view->dms, view->infos) < 0)
|
||||
{
|
||||
obidebug(1, "\nError unmaping a view file while closing a view");
|
||||
ret_value = -1;
|
||||
}
|
||||
|
||||
free(view);
|
||||
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
|
||||
int obi_save_and_close_view(Obiview_p view)
|
||||
{
|
||||
char* predicates;
|
||||
// Finish and save the view if it is not read-only
|
||||
if ( ! (view->read_only))
|
||||
if (finish_view(view) < 0)
|
||||
return -1;
|
||||
|
||||
if (!(view->read_only))
|
||||
{
|
||||
predicates = view_check_all_predicates(view);
|
||||
if (predicates == NULL)
|
||||
{
|
||||
obidebug(1, "\nView predicates not respected");
|
||||
return -1; // TODO reverse view (delete files)
|
||||
}
|
||||
else
|
||||
{
|
||||
write_comments_to_view_file(view, predicates);
|
||||
free(predicates);
|
||||
}
|
||||
if (obi_save_view(view) < 0)
|
||||
return -1;
|
||||
}
|
||||
if (obi_close_view(view) < 0)
|
||||
if (close_view(view) < 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -242,6 +242,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
|
||||
*
|
||||
* @param dms A pointer on the OBIDMS.
|
||||
* @param view_name The unique name identifying the view.
|
||||
* @param finished Whether the view is finished or not.
|
||||
*
|
||||
* @returns A pointer on the mapped view infos structure.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -249,7 +250,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
|
||||
* @since June 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name);
|
||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name, bool finished);
|
||||
|
||||
|
||||
/**
|
||||
@ -444,42 +445,6 @@ int obi_select_line(Obiview_p view, index_t line_nb);
|
||||
int obi_select_lines(Obiview_p view, index_t* line_nbs);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Saves a view, writing it in the view file.
|
||||
*
|
||||
* The view is written at the end of the view file, following the latest written view.
|
||||
*
|
||||
* @warning The view must be writable.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_save_view(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Closes an opened view.
|
||||
*
|
||||
* @warning Uses obi_save_and_close_view() to automatically save the view if it's not already saved in the view file.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_close_view(Obiview_p view);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Closes an opened view, and saves it if it is not read-only (meaning it is not already saved in the view file).
|
||||
*
|
||||
|
@ -1,16 +1,22 @@
|
||||
/*
|
||||
* sse_banded_LCS_alignment.c
|
||||
*
|
||||
* Created on: 7 nov. 2012
|
||||
* Author: celine mercier
|
||||
/****************************************************************************
|
||||
* LCS alignment of two sequences *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file sse_banded_LCS_alignment.c
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date November 7th 2012
|
||||
* @brief Functions handling the alignment of two sequences to compute their Longest Common Sequence.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "obierrno.h"
|
||||
#include "obidebug.h"
|
||||
@ -24,6 +30,231 @@
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function printing a 128 bits register as 8 16-bits integers.
|
||||
*
|
||||
* @param r The register to print.
|
||||
*
|
||||
* @author Eric Coissac (eric.coissac@metabarcoding.org)
|
||||
*/
|
||||
static void printreg(__m128i r);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function extracting a 16-bits integer from a 128 bits register.
|
||||
*
|
||||
* @param r The register to read.
|
||||
* @param p The position at which the integer should be read (between 0 and 7).
|
||||
*
|
||||
* @returns The extracted integer.
|
||||
*
|
||||
* @author Eric Coissac (eric.coissac@metabarcoding.org)
|
||||
*/
|
||||
static inline int extract_reg(__m128i r, int p);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function aligning two sequences, computing the lengths of their Longest Common Subsequence and of their alignment.
|
||||
*
|
||||
* @warning The first argument (seq1) must correspond to the longest sequence.
|
||||
*
|
||||
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||
* @param l1 The length of the first sequence.
|
||||
* @param l2 The length of the second sequence.
|
||||
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
|
||||
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
|
||||
* @param address A pointer, aligned on a 16 bits boundary, on the int array where the initial values for the alignment length are stored,
|
||||
* as prepared for the alignment by initializeAddressWithGaps().
|
||||
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||
* @param ali_length A pointer on the int where the alignment length will be stored.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function aligning two sequences, computing the length of their Longest Common Subsequence (and not the alignment length).
|
||||
*
|
||||
* @warning The first argument (seq1) must correspond to the longest sequence.
|
||||
*
|
||||
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||
* @param l1 The length of the first sequence.
|
||||
* @param l2 The length of the second sequence.
|
||||
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
|
||||
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
|
||||
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function calculating the length of the left band for the banded alignment.
|
||||
*
|
||||
* @param lmax The length of the longest sequence to align.
|
||||
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
|
||||
*
|
||||
* @returns The length of the left band.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int calculateLeftBandLength(int lmax, int LCSmin);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function calculating the length of the right band for the banded alignment.
|
||||
*
|
||||
* @param lmin The length of the shortest sequence to align.
|
||||
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
|
||||
*
|
||||
* @returns The length of the right band.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int calculateRightBandLength(int lmin, int LCSmin);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function calculating the length of the complete band for the banded alignment.
|
||||
*
|
||||
* @param bandLengthRight The length of the right band for the banded alignment, as computed by calculateRightBandLength().
|
||||
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
|
||||
*
|
||||
* @returns The length of the complete band.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function calculating the size to allocate for the int array where the alignment length will be stored in the matrix.
|
||||
*
|
||||
* @param maxLen The length of the longest sequence to align.
|
||||
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
|
||||
*
|
||||
* @returns The size to allocate in bytes.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int calculateSizeToAllocate(int maxLen, int LCSmin);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function initializing the int array corresponding to a sequence to align with default values.
|
||||
*
|
||||
* @param seq The int array corresponding to the sequence to align, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||
* @param size The number of positions to initialize.
|
||||
* @param iniValue The value that the positions should be initialized to.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void iniSeq(int16_t* seq, int size, int16_t iniValue);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function building the int array corresponding to a sequence to align.
|
||||
*
|
||||
* Each nucleotide is stored as a short int (int16_t).
|
||||
*
|
||||
* @param seq A pointer on the allocated int array.
|
||||
* @param s A pointer on the character string corresponding to the sequence.
|
||||
* @param l The length of the sequence.
|
||||
* @param reverse A boolean indicating whether the sequence should be written reversed
|
||||
* (for the second sequence to align).
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void putSeqInSeq(int16_t* seq, char* s, int l, bool reverse);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function building the int array corresponding to an obiblob containing a sequence.
|
||||
*
|
||||
* Each nucleotide is stored as a short int (int16_t).
|
||||
*
|
||||
* @param seq A pointer on the allocated int array.
|
||||
* @param b A pointer on the obiblob containing the sequence.
|
||||
* @param l The length of the (decoded) sequence.
|
||||
* @param reverse A boolean indicating whether the sequence should be written reversed
|
||||
* (for the second sequence to align).
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void putBlobInSeq(int16_t* seq, Obi_blob_p b, int l, bool reverse);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function preparing an int array with the initial values for the alignment lengths before the alignment.
|
||||
*
|
||||
* The int array containing the initial alignment lengths (corresponding to the first line of the diagonalized band of the alignment matrix)
|
||||
* needs to be initialized with external gap lengths before the alignment.
|
||||
*
|
||||
* @param address A pointer, aligned on a 16 bits boundary, on the int array where the initial values for the alignment length are to be stored.
|
||||
* @param bandLengthTotal The length of the complete band for the banded alignment, as computed by calculateSSEBandLength().
|
||||
* @param bandLengthLeft The length of the left band for the banded alignment, as computed by calculateLeftBandLength().
|
||||
* @param lmax The length of the longest sequence to align.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int lmax);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function aligning two sequences, computing the lengths of their Longest Common Subsequence and of their alignment.
|
||||
*
|
||||
* @warning The first argument (seq1) must correspond to the longest sequence.
|
||||
*
|
||||
* @param seq1 The first sequence, the longest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||
* @param seq2 The second sequence, the shortest of the two, as prepared by putSeqInSeq() or putBlobInSeq().
|
||||
* @param l1 The length of the first sequence.
|
||||
* @param l2 The length of the second sequence.
|
||||
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
||||
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||
* @param address A pointer, aligned on a 16 bits boundary, on an allocated int array where the initial values for the alignment length will be stored.
|
||||
* @param LCSmin The minimum length of the LCS to be above the chosen threshold, as computed by calculateLCSmin().
|
||||
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||
* @param ali_length A pointer on the int where the alignment length will be stored.
|
||||
*
|
||||
* @returns The alignment score (normalized according to the parameters).
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length);
|
||||
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
*
|
||||
************************************************************************/
|
||||
|
||||
|
||||
static void printreg(__m128i r)
|
||||
{
|
||||
int16_t a0,a1,a2,a3,a4,a5,a6,a7;
|
||||
@ -61,7 +292,6 @@ static inline int extract_reg(__m128i r, int p)
|
||||
}
|
||||
|
||||
|
||||
// TODO warning on length order
|
||||
void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, int* lcs_length, int* ali_length)
|
||||
{
|
||||
register int j;
|
||||
@ -287,7 +517,6 @@ void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int
|
||||
}
|
||||
|
||||
|
||||
// TODO warning on length order
|
||||
void sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int* lcs_length)
|
||||
{
|
||||
register int j;
|
||||
@ -446,15 +675,14 @@ int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft)
|
||||
}
|
||||
|
||||
|
||||
// TODO that's gonna be fun to doc
|
||||
int calculateSizeToAllocate(int maxLen, int minLen, int LCSmin)
|
||||
int calculateSizeToAllocate(int maxLen, int LCSmin)
|
||||
{
|
||||
int size;
|
||||
|
||||
size = calculateLeftBandLength(maxLen, LCSmin);
|
||||
|
||||
size *= 2;
|
||||
size = (size & (~ (int)7)) + (( size & (int)7) ? 8:0); // Closest greater 8 multiple
|
||||
size = (size & (~ (int)7)) + ((size & (int)7) ? 8:0); // Closest greater 8 multiple
|
||||
size *= 3;
|
||||
size += 16;
|
||||
|
||||
@ -522,13 +750,13 @@ void putBlobInSeq(int16_t* seq, Obi_blob_p b, int l, bool reverse)
|
||||
}
|
||||
|
||||
|
||||
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int l1)
|
||||
void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int lmax)
|
||||
{
|
||||
int i;
|
||||
int address_00, x_address_10, address_01, address_01_shifted;
|
||||
int numberOfRegistersPerLine;
|
||||
int bm;
|
||||
int value=INT16_MAX-l1;
|
||||
int value=INT16_MAX-lmax;
|
||||
|
||||
numberOfRegistersPerLine = bandLengthTotal / 8;
|
||||
bm = bandLengthLeft%2;
|
||||
@ -556,7 +784,6 @@ void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLe
|
||||
}
|
||||
|
||||
|
||||
// TODO warning on length order
|
||||
double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool normalize, int reference, bool similarity_mode, int16_t* address, int LCSmin, int* lcs_length, int* ali_length)
|
||||
{
|
||||
double id;
|
||||
@ -610,10 +837,14 @@ double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, bool n
|
||||
|
||||
|
||||
|
||||
// PUBLIC FUNCTIONS
|
||||
/**********************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool similarity_mode)
|
||||
int calculateLCSmin(int lmax, int lmin, double threshold, bool normalize, int reference, bool similarity_mode)
|
||||
{
|
||||
int LCSmin;
|
||||
|
||||
@ -622,16 +853,16 @@ int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int refere
|
||||
if (normalize)
|
||||
{
|
||||
if (reference == MINLEN)
|
||||
LCSmin = threshold*l2;
|
||||
LCSmin = threshold*lmin;
|
||||
else // ref = maxlen or alilen
|
||||
LCSmin = threshold*l1;
|
||||
LCSmin = threshold*lmax;
|
||||
}
|
||||
else if (similarity_mode)
|
||||
LCSmin = threshold;
|
||||
else if (reference == MINLEN) // not similarity_mode
|
||||
LCSmin = l2 - threshold;
|
||||
LCSmin = lmin - threshold;
|
||||
else // not similarity_mode and ref = maxlen or alilen
|
||||
LCSmin = l1 - threshold;
|
||||
LCSmin = lmax - threshold;
|
||||
}
|
||||
else
|
||||
LCSmin = 0;
|
||||
@ -669,6 +900,14 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
|
||||
lmin = l1;
|
||||
}
|
||||
|
||||
// Check that the sequences are not greater than what can be aligned using the 16 bits registers (as the LCS and alignment lengths are kept on 16 bits)
|
||||
if (lmax > SHRT_MAX)
|
||||
{
|
||||
obi_set_errno(OBI_ALIGN_ERROR);
|
||||
obidebug(1, "\nError: can not align sequences longer than %d (as the LCS and alignment lengths are kept on 16 bits)", SHRT_MAX);
|
||||
return 0; // TODO DOUBLE_MIN to flag error
|
||||
}
|
||||
|
||||
// If the score is expressed as a normalized distance, get the corresponding identity
|
||||
if (!similarity_mode && normalize)
|
||||
threshold = 1.0 - threshold;
|
||||
@ -679,7 +918,7 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
|
||||
// Allocate space for matrix band if the alignment length must be computed
|
||||
if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
|
||||
{
|
||||
sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
|
||||
sizeToAllocateForBand = calculateSizeToAllocate(lmax, LCSmin);
|
||||
address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
|
||||
if (address == NULL)
|
||||
{
|
||||
@ -764,6 +1003,14 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
|
||||
lmin = l1;
|
||||
}
|
||||
|
||||
// Check that the sequences are not greater than what can be aligned using the 16 bits registers (as the LCS and alignment lengths are kept on 16 bits)
|
||||
if (lmax > SHRT_MAX)
|
||||
{
|
||||
obi_set_errno(OBI_ALIGN_ERROR);
|
||||
obidebug(1, "\nError: can not align sequences longer than %d (as the LCS and alignment lengths are kept on 16 bits)", SHRT_MAX);
|
||||
return 0; // TODO DOUBLE_MIN to flag error
|
||||
}
|
||||
|
||||
// If the score is expressed as a normalized distance, get the corresponding identity
|
||||
if (!similarity_mode && normalize)
|
||||
threshold = 1.0 - threshold;
|
||||
@ -774,13 +1021,13 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
|
||||
// Allocate space for matrix band if the alignment length must be computed
|
||||
if ((reference == ALILEN) && (normalize || !similarity_mode)) // cases in which alignment length must be computed
|
||||
{
|
||||
sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
|
||||
sizeToAllocateForBand = calculateSizeToAllocate(lmax, LCSmin);
|
||||
address = obi_get_memory_aligned_on_16(sizeToAllocateForBand, &shift);
|
||||
if (address == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError getting a memory address aligned on 16 bytes boundary");
|
||||
return 0; // TODO DOUBLE_MIN
|
||||
obidebug(1, "\nError getting a memory address aligned on a 16 bits boundary");
|
||||
return 0; // TODO DOUBLE_MIN to flag error
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,15 @@
|
||||
/*
|
||||
* sse_banded_LCS_alignment.h
|
||||
*
|
||||
* Created on: november 29, 2012
|
||||
* Author: mercier
|
||||
/****************************************************************************
|
||||
* LCS alignment of two sequences header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file sse_banded_LCS_alignment.h
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date November 7th 2012
|
||||
* @brief header file for the functions handling the alignment of two sequences to compute their Longest Common Sequence.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef SSE_BANDED_LCS_ALIGNMENT_H_
|
||||
#define SSE_BANDED_LCS_ALIGNMENT_H_
|
||||
|
||||
@ -15,13 +20,97 @@
|
||||
#include "obiblob.h"
|
||||
|
||||
|
||||
#define ALILEN (0) // TODO enum
|
||||
/**
|
||||
* @brief Macros for reference lengths to use when aligning.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Eric Coissac (eric.coissac@metabarcoding.org)
|
||||
*/
|
||||
#define ALILEN (0)
|
||||
#define MAXLEN (1)
|
||||
#define MINLEN (2)
|
||||
|
||||
// TODO doc
|
||||
int calculateLCSmin(int l1, int l2, double threshold, bool normalize, int reference, bool lcsmode);
|
||||
|
||||
/**
|
||||
* @brief Function calculating the minimum length of the Longest Common Subsequence between two sequences to be above a chosen score threshold.
|
||||
*
|
||||
* @warning The first argument (lmax) must correspond to length of the longest sequence.
|
||||
*
|
||||
* @param lmax The length of the longest sequence to align.
|
||||
* @param lmin The length of the shortest sequence to align.
|
||||
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
|
||||
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||
*
|
||||
* @returns The minimum length of the Longest Common Subsequence between two sequences to be above the chosen score threshold.
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int calculateLCSmin(int lmax, int lmin, double threshold, bool normalize, int reference, bool similarity_mode);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function aligning two sequences.
|
||||
*
|
||||
* The alignment algorithm is a banded global alignment algorithm, a modified version of the classical Needleman and Wunsch algorithm,
|
||||
* and uses indices based on the length of the Longest Common Subsequence between the two sequences.
|
||||
*
|
||||
* Note: the sequences do not need to be ordered (e.g. with the longest sequence as first argument).
|
||||
*
|
||||
* @param seq1 A pointer on the character string corresponding to the first sequence.
|
||||
* @param seq2 A pointer on the character string corresponding to the second sequence.
|
||||
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
|
||||
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||
* @param ali_length A pointer on the int where the alignment length will be stored.
|
||||
*
|
||||
* @returns The alignment score (normalized according to the parameters).
|
||||
*
|
||||
* @since 2012
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function aligning two sequences encoded in obiblobs.
|
||||
*
|
||||
* The alignment algorithm is a banded global alignment algorithm, a modified version of the classical Needleman and Wunsch algorithm,
|
||||
* and uses indices based on the length of the Longest Common Subsequence between the two sequences.
|
||||
*
|
||||
* Note: the obiblobs do not need to be ordered (e.g. with the obiblob containing the longest sequence as first argument).
|
||||
*
|
||||
* @param seq1 A pointer on the blob containing the first sequence.
|
||||
* @param seq2 A pointer on the blob containing the second sequence.
|
||||
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||
* @param reference The reference length. 0: The alignment length; 1: The longest sequence's length; 2: The shortest sequence's length. // TODO
|
||||
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||
* @param lcs_length A pointer on the int where the LCS length will be stored.
|
||||
* @param ali_length A pointer on the int where the alignment length will be stored.
|
||||
*
|
||||
* @returns The alignment score (normalized according to the parameters).
|
||||
*
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double threshold, bool normalize, int reference, bool similarity_mode, int* lcs_length, int* ali_length);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -8,8 +8,6 @@
|
||||
#include "obidmscolumn.h"
|
||||
#include "obiview.h"
|
||||
|
||||
//#include "../libutils/utilities.h"
|
||||
//#include "../libfasta/sequence.h"
|
||||
|
||||
|
||||
inline static uchar_v hash4m128(uchar_v frag)
|
||||
@ -242,7 +240,7 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t se
|
||||
|
||||
fprintf(stderr,"Building kmer tables...");
|
||||
|
||||
seq_count = (seq_col->header)->lines_used;
|
||||
seq_count = (view->infos)->line_count;
|
||||
|
||||
// Allocate memory for the table structure
|
||||
ktable = (Kmer_table_p) malloc(sizeof(Kmer_table_t) * seq_count);
|
||||
@ -267,6 +265,44 @@ Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t se
|
||||
}
|
||||
|
||||
|
||||
Kmer_table_p hash_two_seq_columns(Obiview_p view1, OBIDMS_column_p seq1_col, index_t seq1_idx,
|
||||
Obiview_p view2, OBIDMS_column_p seq2_col, index_t seq2_idx)
|
||||
{
|
||||
size_t seq1_count;
|
||||
size_t seq2_count;
|
||||
Kmer_table_p ktable1;
|
||||
Kmer_table_p ktable2;
|
||||
Kmer_table_p ktable;
|
||||
|
||||
seq1_count = (view1->infos)->line_count;
|
||||
seq2_count = (view2->infos)->line_count;
|
||||
|
||||
// Build the two tables then concatenate them
|
||||
ktable1 = hash_seq_column(view1, seq1_col, seq1_idx);
|
||||
if (ktable1 == NULL)
|
||||
return NULL;
|
||||
ktable2 = hash_seq_column(view2, seq2_col, seq2_idx);
|
||||
if (ktable2 == NULL)
|
||||
return NULL;
|
||||
|
||||
// Realloc to hold the 2 tables
|
||||
ktable = realloc(ktable1, sizeof(Kmer_table_t) * (seq1_count + seq2_count));
|
||||
if (ktable == NULL)
|
||||
{
|
||||
free_kmer_tables(ktable2, seq2_count);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Concatenate
|
||||
memcpy(ktable+seq1_count, ktable2, sizeof(Kmer_table_t) * seq2_count);
|
||||
|
||||
// Free copied table
|
||||
free(ktable2);
|
||||
|
||||
return ktable;
|
||||
}
|
||||
|
||||
|
||||
void free_kmer_tables(Kmer_table_p ktable, size_t count)
|
||||
{
|
||||
size_t i;
|
||||
|
@ -18,7 +18,11 @@ typedef struct {
|
||||
} Kmer_table_t, *Kmer_table_p;
|
||||
|
||||
|
||||
// TODO doc
|
||||
|
||||
Kmer_table_p hash_seq_column(Obiview_p view, OBIDMS_column_p seq_col, index_t seq_idx);
|
||||
Kmer_table_p hash_two_seq_columns(Obiview_p view1, OBIDMS_column_p seq1_col, index_t seq1_idx,
|
||||
Obiview_p view2, OBIDMS_column_p seq2_col, index_t seq2_idx);
|
||||
void align_filters(Kmer_table_p ktable, Obi_blob_p seq1, Obi_blob_p seq2, index_t idx1, index_t idx2, double threshold, bool normalize, int reference, bool similarity_mode, double* score, int* LCSmin, bool can_be_identical);
|
||||
void free_kmer_tables(Kmer_table_p ktable, size_t count);
|
||||
|
||||
|
190
src/utils.c
190
src/utils.c
@ -116,3 +116,193 @@ void* obi_get_memory_aligned_on_16(int size, int* shift)
|
||||
return (memory);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* A generic implementation of binary search for the Linux kernel
|
||||
*
|
||||
* Copyright (C) 2008-2009 Ksplice, Inc.
|
||||
* Author: Tim Abbott <tabbott@ksplice.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; version 2.
|
||||
*/
|
||||
void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data,
|
||||
int (*cmp)(const void *key, const void *elt, const void* user_data))
|
||||
{
|
||||
size_t start = 0;
|
||||
size_t end = num;
|
||||
size_t mid;
|
||||
int result;
|
||||
|
||||
while (start < end)
|
||||
{
|
||||
mid = start + (end - start) / 2;
|
||||
result = cmp(key, base + mid * size, user_data);
|
||||
if (result < 0)
|
||||
end = mid;
|
||||
else if (result > 0)
|
||||
start = mid + 1;
|
||||
else
|
||||
return (void*)base + mid * size;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
* Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
|
||||
*/
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? a : b)
|
||||
|
||||
#define swapcode(TYPE, parmi, parmj, n) { \
|
||||
long i = (n) / sizeof (TYPE); \
|
||||
register TYPE *pi = (TYPE *) (parmi); \
|
||||
register TYPE *pj = (TYPE *) (parmj); \
|
||||
do { \
|
||||
register TYPE t = *pi; \
|
||||
*pi++ = *pj; \
|
||||
*pj++ = t; \
|
||||
} while (--i > 0); \
|
||||
}
|
||||
|
||||
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
|
||||
es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
|
||||
|
||||
static __inline void
|
||||
swapfunc(char *a, char *b, int n, int swaptype)
|
||||
{
|
||||
if (swaptype <= 1)
|
||||
swapcode(long, a, b, n)
|
||||
else
|
||||
swapcode(char, a, b, n)
|
||||
}
|
||||
|
||||
#define swap(a, b) \
|
||||
if (swaptype == 0) { \
|
||||
long t = *(long *)(a); \
|
||||
*(long *)(a) = *(long *)(b); \
|
||||
*(long *)(b) = t; \
|
||||
} else \
|
||||
swapfunc(a, b, es, swaptype)
|
||||
|
||||
#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
|
||||
|
||||
static __inline char *
|
||||
med3(char *a, char *b, char *c, const void *user_data, int (*cmp)(const void *, const void *, const void *))
|
||||
{
|
||||
return cmp(a, b, user_data) < 0 ?
|
||||
(cmp(b, c, user_data) < 0 ? b : (cmp(a, c, user_data) < 0 ? c : a ))
|
||||
:(cmp(b, c, user_data) > 0 ? b : (cmp(a, c, user_data) < 0 ? a : c ));
|
||||
}
|
||||
|
||||
void
|
||||
qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *))
|
||||
{
|
||||
char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
|
||||
int d, r, swaptype, swap_cnt;
|
||||
register char *a = aa;
|
||||
|
||||
loop: SWAPINIT(a, es);
|
||||
swap_cnt = 0;
|
||||
if (n < 7) {
|
||||
for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
|
||||
for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0;
|
||||
pl -= es)
|
||||
swap(pl, pl - es);
|
||||
return;
|
||||
}
|
||||
pm = (char *)a + (n / 2) * es;
|
||||
if (n > 7) {
|
||||
pl = (char *)a;
|
||||
pn = (char *)a + (n - 1) * es;
|
||||
if (n > 40) {
|
||||
d = (n / 8) * es;
|
||||
pl = med3(pl, pl + d, pl + 2 * d, user_data, cmp);
|
||||
pm = med3(pm - d, pm, pm + d, user_data, cmp);
|
||||
pn = med3(pn - 2 * d, pn - d, pn, user_data, cmp);
|
||||
}
|
||||
pm = med3(pl, pm, pn, user_data, cmp);
|
||||
}
|
||||
swap(a, pm);
|
||||
pa = pb = (char *)a + es;
|
||||
|
||||
pc = pd = (char *)a + (n - 1) * es;
|
||||
for (;;) {
|
||||
while (pb <= pc && (r = cmp(pb, a, user_data)) <= 0) {
|
||||
if (r == 0) {
|
||||
swap_cnt = 1;
|
||||
swap(pa, pb);
|
||||
pa += es;
|
||||
}
|
||||
pb += es;
|
||||
}
|
||||
while (pb <= pc && (r = cmp(pc, a, user_data)) >= 0) {
|
||||
if (r == 0) {
|
||||
swap_cnt = 1;
|
||||
swap(pc, pd);
|
||||
pd -= es;
|
||||
}
|
||||
pc -= es;
|
||||
}
|
||||
if (pb > pc)
|
||||
break;
|
||||
swap(pb, pc);
|
||||
swap_cnt = 1;
|
||||
pb += es;
|
||||
pc -= es;
|
||||
}
|
||||
if (swap_cnt == 0) { /* Switch to insertion sort */
|
||||
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
|
||||
for (pl = pm; pl > (char *) a && cmp(pl - es, pl, user_data) > 0;
|
||||
pl -= es)
|
||||
swap(pl, pl - es);
|
||||
return;
|
||||
}
|
||||
|
||||
pn = (char *)a + n * es;
|
||||
r = MIN(pa - (char *)a, pb - pa);
|
||||
vecswap(a, pb - r, r);
|
||||
r = MIN((long)(pd - pc), (long)(pn - pd - es));
|
||||
vecswap(pb, pn - r, r);
|
||||
if ((r = pb - pa) > (int)es)
|
||||
qsort_user_data(a, r / es, es, user_data, cmp);
|
||||
if ((r = pd - pc) > (int)es) {
|
||||
/* Iterate rather than recurse to save stack space */
|
||||
a = pn - r;
|
||||
n = r / es;
|
||||
goto loop;
|
||||
}
|
||||
/* qsort(pn - r, r / es, es, cmp);*/
|
||||
}
|
||||
|
||||
|
37
src/utils.h
37
src/utils.h
@ -74,4 +74,41 @@ char* obi_format_date(time_t date);
|
||||
void* obi_get_memory_aligned_on_16(int size, int* shift);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Version of quick sort modified to allow the user to provide an
|
||||
* additional pointer sent to the comparison function.
|
||||
*
|
||||
* @param key This is the pointer to the object that serves as key for the search, type-casted as a void*.
|
||||
* @param base This is the pointer to the first object of the array where the search is performed, type-casted as a void*.
|
||||
* @param num This is the number of elements in the array pointed by base.
|
||||
* @param size This is the size in bytes of each element in the array.
|
||||
* @param user_data This is an additional pointer passed to the comparison function.
|
||||
* @param cmp This is the function that compares two elements, eventually with an additional pointer.
|
||||
*
|
||||
* @returns A pointer to an entry in the array that matches the search key.
|
||||
* @retval NULL if key is not found.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author original code modified by Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void* bsearch_user_data(const void* key, const void* base, size_t num, size_t size, const void* user_data,
|
||||
int (*cmp)(const void *key, const void *elt, const void* user_data));
|
||||
|
||||
|
||||
/**
|
||||
* @brief Version of quick sort modified to allow the user to provide an
|
||||
* additional pointer sent to the comparison function.
|
||||
*
|
||||
* @param aa This is the pointer to the first element of the array to be sorted.
|
||||
* @param n This is the number of elements in the array pointed by base.
|
||||
* @param es This is the size in bytes of each element in the array.
|
||||
* @param user_data This is an additional pointer passed to the comparison function.
|
||||
* @param cmp This is the function that compares two elements, eventually with an additional pointer.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author original code modified by Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void qsort_user_data(void *aa, size_t n, size_t es, const void *user_data, int (*cmp)(const void *, const void *, const void *));
|
||||
|
||||
|
||||
#endif /* UTILS_H_ */
|
||||
|
Reference in New Issue
Block a user