The default element names for columns with multiple elements per line

are now "O;1;2;...;n"
This commit is contained in:
Celine Mercier
2016-12-02 17:54:51 +01:00
parent e60497651c
commit 852e5488c8
5 changed files with 116 additions and 87 deletions

View File

@ -379,10 +379,10 @@ cdef class OBIView :
else :
alias_b = str2bytes(alias)
if nb_elements_per_line > 1 :
if elements_names is None :
elements_names_b = str2bytes("")
else :
elements_names_b = str2bytes(';'.join(elements_names))
elif nb_elements_per_line == 1 :
elements_names_b = column_name_b
if type : # TODO make C function that does that
if type == 'OBI_INT' :
@ -528,61 +528,44 @@ cdef class OBIView_NUC_SEQS(OBIView):
# TODO discuss
cpdef align(self, OBIView oview, OBIView iview2=None,
double threshold=0.0, bint normalize=True, int reference=0, bint similarity_mode=True) :
cdef OBIView iview1
cdef Obiview_p iview1_p
cdef Obiview_p iview2_p
cdef Obiview_p oview_p
cdef OBIDMS_column icol1
cdef OBIDMS_column_p icol1_p
cdef OBIDMS_column_p* icol1_pp
cdef OBIDMS_column id1_col
cdef OBIDMS_column_p id1_col_p
cdef OBIDMS_column_p* id1_col_pp
cdef OBIDMS_column id2_col
cdef OBIDMS_column_p id2_col_p
cdef OBIDMS_column_p* id2_col_pp
cdef OBIDMS_column ocol
cdef OBIDMS_column_p ocol_p
cdef OBIDMS_column_p* ocol_pp
cdef str id1_col_name
cdef str id2_col_name
cdef str score_col_name
id1_col_name = "ID1" # TODO discuss names, aliases
id2_col_name = "ID2"
score_col_name = "score"
iview1= self
iview1_p = iview1._pointer
icol1 = iview1[bytes2str(NUC_SEQUENCE_COLUMN)]
icol1_pp = icol1._pointer
icol1_p = icol1_pp[0]
oview.add_column(id1_col_name, type='OBI_STR', create=True)
oview.add_column(id2_col_name, type='OBI_STR', create=True)
oview.add_column(score_col_name, type='OBI_FLOAT', create=True)
oview_p = oview._pointer
ocol = oview[score_col_name]
ocol_pp = ocol._pointer
ocol_p = ocol_pp[0]
id1_col = oview[id1_col_name]
id2_col = oview[id2_col_name]
id1_col_pp = id1_col._pointer
id2_col_pp = id2_col._pointer
id1_col_p = id1_col_pp[0]
id2_col_p = id2_col_pp[0]
if obi_align_one_column(iview1_p, icol1_p, NULL, oview_p, id1_col_p, id2_col_p, ocol_p, threshold, normalize, reference, similarity_mode) < 0 :
raise Exception("Error aligning sequences")
pass
#
# cdef OBIView iview1
#
# cdef Obiview_p iview1_p
# cdef Obiview_p iview2_p
# cdef Obiview_p oview_p
#
# cdef OBIDMS_column icol1
# cdef OBIDMS_column_p icol1_p
# cdef OBIDMS_column_p* icol1_pp
#
# cdef OBIDMS_column id1_col
# cdef OBIDMS_column_p id1_col_p
# cdef OBIDMS_column_p* id1_col_pp
#
# cdef OBIDMS_column id2_col
# cdef OBIDMS_column_p id2_col_p
# cdef OBIDMS_column_p* id2_col_pp
#
# cdef OBIDMS_column ocol
# cdef OBIDMS_column_p ocol_p
# cdef OBIDMS_column_p* ocol_pp
#
# cdef str id1_col_name
# cdef str id2_col_name
# cdef str score_col_name
#
# score_col_name = "score"
#
# iview1= self
# iview1_p = iview1._pointer
# icol1 = iview1[bytes2str(NUC_SEQUENCE_COLUMN)]
# icol1_pp = icol1._pointer
# icol1_p = icol1_pp[0]
#
# if obi_align_one_column(iview1_p, icol1_p, threshold, normalize, reference, similarity_mode) < 0 :
# raise Exception("Error aligning sequences")
######################################################################################################

View File

@ -422,10 +422,18 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names)
{
if (strlen(elements_names) > ELEMENTS_NAMES_MAX)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: element names too long (max: %d)", ELEMENTS_NAMES_MAX);
return -1;
}
strcpy((column->header)->elements_names, elements_names);
return 0;
}
index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
{
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
@ -543,6 +551,37 @@ size_t obi_get_platform_header_size()
}
// TODO
char* build_default_elements_names(index_t nb_elements_per_line)
{
char* elements_names;
int i;
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
if (elements_names == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for elements names");
return NULL;
}
if (nb_elements_per_line > NB_ELTS_MAX_IF_DEFAULT_NAME)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: too many elements per line to use the default names (max = %d elements)", NB_ELTS_MAX_IF_DEFAULT_NAME);
return NULL;
}
for (i= 0; i < nb_elements_per_line; i++)
sprintf(elements_names, "%d", i);
// Terminal character
elements_names[strlen(elements_names)] = '\0';
return elements_names;
}
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* column_name,
OBIType_t data_type,
@ -574,16 +613,19 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
// Check that the informations given are not NULL/invalid/greater than the allowed sizes
if (dms == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nCan't create column because of invalid DMS");
return NULL;
}
if (column_name == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nCan't create column because of empty column name");
return NULL;
}
if ((data_type < 1) || (data_type > 8)) // TODO check in more robust way
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nCan't create column because of invalid data type");
return NULL;
}
@ -637,14 +679,15 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
else if (nb_lines < minimum_line_count)
nb_lines = minimum_line_count;
// The number of elements names should be equal to the number of elements per line
if ((elements_names == NULL) && (nb_elements_per_line > 1))
// Check and build if needed the element names
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0)) // Build the default element names: str of the element index
{
obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1");
return NULL;
elements_names = build_default_elements_names(nb_elements_per_line);
if (elements_names == NULL)
return NULL;
}
else if ((elements_names != NULL) && (nb_elements_per_line > 1))
{
else if (((elements_names == NULL) || (strcmp(elements_names, "") != 0)) && (nb_elements_per_line > 1))
{ // The number of elements names should be equal to the number of elements per line
char* token;
index_t n = 0;
token = strdup(elements_names);
@ -660,11 +703,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
return NULL;
}
}
else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0)) // TODO Discuss, maybe just make it the column name
{
obidebug(1, "\nCan't create column because the element name does not match the column name");
return NULL;
}
// TODO what if 1 element and name specified? doc
// Calculate the size needed
header_size = obi_get_platform_header_size();
@ -1060,7 +1099,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
for (i=0; i<((line_selection->header)->lines_used); i++)
{
// Get the index in the line selection column
index = obi_column_get_index(line_selection, i);
index = obi_column_get_index_with_elt_idx(line_selection, i, 0);
// Copy the line at the index in the column to clone to the new column
memcpy((new_column->data)+(i*line_size), (column_to_clone->data)+(index*line_size), line_size);
}
@ -1590,7 +1629,7 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
}
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb) // TODO problem with some columns in a view being empty or shorter and triggering an error because they've been truncated when the view was closed. Fixed with obiview.c in update_lines() for now
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
{
if ((line_nb+1) > ((column->header)->line_count))
{

View File

@ -28,14 +28,17 @@
#include "obiblob_indexer.h"
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO
*/
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
*/
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
*/
#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
* are used ("0;1;2;...;n"), considering ELEMENTS_NAMES_MAX.
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO
*/
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
*/
/**
@ -64,7 +67,8 @@ typedef struct OBIDMS_column_header {
index_t nb_elements_per_line; /**< Number of elements per line.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator
* (should be the column name if one element per line).
* (no terminal ';').
* (default are the indices: "0;1;2;...;n").
*/
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
@ -179,7 +183,8 @@ size_t obi_get_platform_header_size();
* @param data_type The OBIType code of the data.
* @param nb_lines The number of lines to be stored.
* @param nb_elements_per_line The number of elements per line. // TODO talk about default values
* @param elements_names The names of the elements with ';' as separator.
* @param elements_names The names of the elements with ';' as separator (no terminal ';'),
* NULL or "" if the default names are to be used ("0;1;2;...;n").
* @param indexer_name The name of the indexer if there is one associated with the column.
* If NULL or "", the indexer name is set as the column name.
* @param associated_column_name The name of the associated column if there is one.

View File

@ -1236,7 +1236,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
// If there is a new line selection, build it by combining it with the one from the view to clone if there is one
else if (line_selection != NULL)
{
view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, LINES_COLUMN_NAME, NULL, NULL, -1, NULL);
view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL);
if ((view->line_selection) == NULL)
{
obidebug(1, "\nError creating a column corresponding to a line selection");
@ -1440,19 +1440,19 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
if (view_to_clone == NULL)
{
// Adding sequence column
if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NUC_SEQUENCE_COLUMN, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", NULL, -1, "Nucleotide sequences", true) < 0)
if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NUC_SEQUENCE_COLUMN, OBI_SEQ, 0, 1, NULL, "", NULL, -1, "Nucleotide sequences", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Adding id column
if (obi_view_add_column(view, ID_COLUMN, -1, ID_COLUMN, OBI_STR, 0, 1, ID_COLUMN, "", NULL, -1, "Ids", true) < 0)
if (obi_view_add_column(view, ID_COLUMN, -1, ID_COLUMN, OBI_STR, 0, 1, NULL, "", NULL, -1, "Ids", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Adding definition column
if (obi_view_add_column(view, DEFINITION_COLUMN, -1, DEFINITION_COLUMN, OBI_STR, 0, 1, DEFINITION_COLUMN, "", NULL, -1, "Definitions", true) < 0)
if (obi_view_add_column(view, DEFINITION_COLUMN, -1, DEFINITION_COLUMN, OBI_STR, 0, 1, NULL, "", NULL, -1, "Definitions", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
@ -1461,7 +1461,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
if (quality_column)
{
associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN);
if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, NULL, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;

View File

@ -299,8 +299,10 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
* @param data_type The OBIType code of the data.
* @param nb_lines The number of lines to be stored.
* @param nb_elements_per_line The number of elements per line.
* @param elements_names The names of the elements with ';' as separator.
* @param elements_names The names of the elements with ';' as separator (no terminal ';'),
* NULL or "" if the default names are to be used ("0;1;2;...;n").
* @param indexer_name The name of the indexer if there is one associated with the column.
* If NULL or "", the indexer name is set as the column name.
* @param comments Optional comments associated with the column.
* @param create Whether the column should be created (create == true) or opened (create == false).
*