From 852e5488c83c104485a727e1916bb7ac548602c6 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Fri, 2 Dec 2016 17:54:51 +0100 Subject: [PATCH] The default element names for columns with multiple elements per line are now "O;1;2;...;n" --- python/obitools3/obidms/_obidms.pyx | 99 ++++++++++++----------------- src/obidmscolumn.c | 65 +++++++++++++++---- src/obidmscolumn.h | 25 +++++--- src/obiview.c | 10 +-- src/obiview.h | 4 +- 5 files changed, 116 insertions(+), 87 deletions(-) diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx index 98022b4..aaeba5d 100644 --- a/python/obitools3/obidms/_obidms.pyx +++ b/python/obitools3/obidms/_obidms.pyx @@ -379,10 +379,10 @@ cdef class OBIView : else : alias_b = str2bytes(alias) - if nb_elements_per_line > 1 : + if elements_names is None : + elements_names_b = str2bytes("") + else : elements_names_b = str2bytes(';'.join(elements_names)) - elif nb_elements_per_line == 1 : - elements_names_b = column_name_b if type : # TODO make C function that does that if type == 'OBI_INT' : @@ -528,61 +528,44 @@ cdef class OBIView_NUC_SEQS(OBIView): # TODO discuss cpdef align(self, OBIView oview, OBIView iview2=None, double threshold=0.0, bint normalize=True, int reference=0, bint similarity_mode=True) : - - cdef OBIView iview1 - - cdef Obiview_p iview1_p - cdef Obiview_p iview2_p - cdef Obiview_p oview_p - - cdef OBIDMS_column icol1 - cdef OBIDMS_column_p icol1_p - cdef OBIDMS_column_p* icol1_pp - - cdef OBIDMS_column id1_col - cdef OBIDMS_column_p id1_col_p - cdef OBIDMS_column_p* id1_col_pp - - cdef OBIDMS_column id2_col - cdef OBIDMS_column_p id2_col_p - cdef OBIDMS_column_p* id2_col_pp - - cdef OBIDMS_column ocol - cdef OBIDMS_column_p ocol_p - cdef OBIDMS_column_p* ocol_pp - - cdef str id1_col_name - cdef str id2_col_name - cdef str score_col_name - - id1_col_name = "ID1" # TODO discuss names, aliases - id2_col_name = "ID2" - score_col_name = "score" - - iview1= self - iview1_p = iview1._pointer - icol1 = iview1[bytes2str(NUC_SEQUENCE_COLUMN)] - icol1_pp = icol1._pointer - icol1_p = icol1_pp[0] - - oview.add_column(id1_col_name, type='OBI_STR', create=True) - oview.add_column(id2_col_name, type='OBI_STR', create=True) - oview.add_column(score_col_name, type='OBI_FLOAT', create=True) - - oview_p = oview._pointer - ocol = oview[score_col_name] - ocol_pp = ocol._pointer - ocol_p = ocol_pp[0] - - id1_col = oview[id1_col_name] - id2_col = oview[id2_col_name] - id1_col_pp = id1_col._pointer - id2_col_pp = id2_col._pointer - id1_col_p = id1_col_pp[0] - id2_col_p = id2_col_pp[0] - - if obi_align_one_column(iview1_p, icol1_p, NULL, oview_p, id1_col_p, id2_col_p, ocol_p, threshold, normalize, reference, similarity_mode) < 0 : - raise Exception("Error aligning sequences") + pass +# +# cdef OBIView iview1 +# +# cdef Obiview_p iview1_p +# cdef Obiview_p iview2_p +# cdef Obiview_p oview_p +# +# cdef OBIDMS_column icol1 +# cdef OBIDMS_column_p icol1_p +# cdef OBIDMS_column_p* icol1_pp +# +# cdef OBIDMS_column id1_col +# cdef OBIDMS_column_p id1_col_p +# cdef OBIDMS_column_p* id1_col_pp +# +# cdef OBIDMS_column id2_col +# cdef OBIDMS_column_p id2_col_p +# cdef OBIDMS_column_p* id2_col_pp +# +# cdef OBIDMS_column ocol +# cdef OBIDMS_column_p ocol_p +# cdef OBIDMS_column_p* ocol_pp +# +# cdef str id1_col_name +# cdef str id2_col_name +# cdef str score_col_name +# +# score_col_name = "score" +# +# iview1= self +# iview1_p = iview1._pointer +# icol1 = iview1[bytes2str(NUC_SEQUENCE_COLUMN)] +# icol1_pp = icol1._pointer +# icol1_p = icol1_pp[0] +# +# if obi_align_one_column(iview1_p, icol1_p, threshold, normalize, reference, similarity_mode) < 0 : +# raise Exception("Error aligning sequences") ###################################################################################################### diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c index dc53adf..6bd47b5 100644 --- a/src/obidmscolumn.c +++ b/src/obidmscolumn.c @@ -422,10 +422,18 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names) { + if (strlen(elements_names) > ELEMENTS_NAMES_MAX) + { + obi_set_errno(OBICOL_UNKNOWN_ERROR); + obidebug(1, "\nError: element names too long (max: %d)", ELEMENTS_NAMES_MAX); + return -1; + } + strcpy((column->header)->elements_names, elements_names); return 0; } + index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line) { return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line); @@ -543,6 +551,37 @@ size_t obi_get_platform_header_size() } +// TODO +char* build_default_elements_names(index_t nb_elements_per_line) +{ + char* elements_names; + int i; + + elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char)); + if (elements_names == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for elements names"); + return NULL; + } + + if (nb_elements_per_line > NB_ELTS_MAX_IF_DEFAULT_NAME) + { + obi_set_errno(OBICOL_UNKNOWN_ERROR); + obidebug(1, "\nError: too many elements per line to use the default names (max = %d elements)", NB_ELTS_MAX_IF_DEFAULT_NAME); + return NULL; + } + + for (i= 0; i < nb_elements_per_line; i++) + sprintf(elements_names, "%d", i); + + // Terminal character + elements_names[strlen(elements_names)] = '\0'; + + return elements_names; +} + + OBIDMS_column_p obi_create_column(OBIDMS_p dms, const char* column_name, OBIType_t data_type, @@ -574,16 +613,19 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, // Check that the informations given are not NULL/invalid/greater than the allowed sizes if (dms == NULL) { + obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nCan't create column because of invalid DMS"); return NULL; } if (column_name == NULL) { + obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nCan't create column because of empty column name"); return NULL; } if ((data_type < 1) || (data_type > 8)) // TODO check in more robust way { + obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nCan't create column because of invalid data type"); return NULL; } @@ -637,14 +679,15 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, else if (nb_lines < minimum_line_count) nb_lines = minimum_line_count; - // The number of elements names should be equal to the number of elements per line - if ((elements_names == NULL) && (nb_elements_per_line > 1)) + // Check and build if needed the element names + if ((elements_names == NULL) || (strcmp(elements_names, "") == 0)) // Build the default element names: str of the element index { - obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1"); - return NULL; + elements_names = build_default_elements_names(nb_elements_per_line); + if (elements_names == NULL) + return NULL; } - else if ((elements_names != NULL) && (nb_elements_per_line > 1)) - { + else if (((elements_names == NULL) || (strcmp(elements_names, "") != 0)) && (nb_elements_per_line > 1)) + { // The number of elements names should be equal to the number of elements per line char* token; index_t n = 0; token = strdup(elements_names); @@ -660,11 +703,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, return NULL; } } - else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0)) // TODO Discuss, maybe just make it the column name - { - obidebug(1, "\nCan't create column because the element name does not match the column name"); - return NULL; - } + // TODO what if 1 element and name specified? doc // Calculate the size needed header_size = obi_get_platform_header_size(); @@ -1060,7 +1099,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms, for (i=0; i<((line_selection->header)->lines_used); i++) { // Get the index in the line selection column - index = obi_column_get_index(line_selection, i); + index = obi_column_get_index_with_elt_idx(line_selection, i, 0); // Copy the line at the index in the column to clone to the new column memcpy((new_column->data)+(i*line_size), (column_to_clone->data)+(index*line_size), line_size); } @@ -1590,7 +1629,7 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb) } -int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb) // TODO problem with some columns in a view being empty or shorter and triggering an error because they've been truncated when the view was closed. Fixed with obiview.c in update_lines() for now +int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb) { if ((line_nb+1) > ((column->header)->line_count)) { diff --git a/src/obidmscolumn.h b/src/obidmscolumn.h index 4005af9..05a5db9 100644 --- a/src/obidmscolumn.h +++ b/src/obidmscolumn.h @@ -28,14 +28,17 @@ #include "obiblob_indexer.h" -#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss - */ -#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged. - */ -#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO - */ -#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments. - */ +#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss + */ +#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names + * are used ("0;1;2;...;n"), considering ELEMENTS_NAMES_MAX. + */ +#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged. + */ +#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO + */ +#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments. + */ /** @@ -64,7 +67,8 @@ typedef struct OBIDMS_column_header { index_t nb_elements_per_line; /**< Number of elements per line. */ char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator - * (should be the column name if one element per line). + * (no terminal ';'). + * (default are the indices: "0;1;2;...;n"). */ OBIType_t returned_data_type; /**< Type of the data that is returned when getting an * element from the column. @@ -179,7 +183,8 @@ size_t obi_get_platform_header_size(); * @param data_type The OBIType code of the data. * @param nb_lines The number of lines to be stored. * @param nb_elements_per_line The number of elements per line. // TODO talk about default values - * @param elements_names The names of the elements with ';' as separator. + * @param elements_names The names of the elements with ';' as separator (no terminal ';'), + * NULL or "" if the default names are to be used ("0;1;2;...;n"). * @param indexer_name The name of the indexer if there is one associated with the column. * If NULL or "", the indexer name is set as the column name. * @param associated_column_name The name of the associated column if there is one. diff --git a/src/obiview.c b/src/obiview.c index f54d4f2..4014a1e 100644 --- a/src/obiview.c +++ b/src/obiview.c @@ -1236,7 +1236,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl // If there is a new line selection, build it by combining it with the one from the view to clone if there is one else if (line_selection != NULL) { - view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, LINES_COLUMN_NAME, NULL, NULL, -1, NULL); + view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, NULL, NULL, -1, NULL); if ((view->line_selection) == NULL) { obidebug(1, "\nError creating a column corresponding to a line selection"); @@ -1440,19 +1440,19 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v if (view_to_clone == NULL) { // Adding sequence column - if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NUC_SEQUENCE_COLUMN, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", NULL, -1, "Nucleotide sequences", true) < 0) + if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NUC_SEQUENCE_COLUMN, OBI_SEQ, 0, 1, NULL, "", NULL, -1, "Nucleotide sequences", true) < 0) { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; } // Adding id column - if (obi_view_add_column(view, ID_COLUMN, -1, ID_COLUMN, OBI_STR, 0, 1, ID_COLUMN, "", NULL, -1, "Ids", true) < 0) + if (obi_view_add_column(view, ID_COLUMN, -1, ID_COLUMN, OBI_STR, 0, 1, NULL, "", NULL, -1, "Ids", true) < 0) { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; } // Adding definition column - if (obi_view_add_column(view, DEFINITION_COLUMN, -1, DEFINITION_COLUMN, OBI_STR, 0, 1, DEFINITION_COLUMN, "", NULL, -1, "Definitions", true) < 0) + if (obi_view_add_column(view, DEFINITION_COLUMN, -1, DEFINITION_COLUMN, OBI_STR, 0, 1, NULL, "", NULL, -1, "Definitions", true) < 0) { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; @@ -1461,7 +1461,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v if (quality_column) { associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN); - if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association + if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, NULL, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; diff --git a/src/obiview.h b/src/obiview.h index df842da..eb4df57 100644 --- a/src/obiview.h +++ b/src/obiview.h @@ -299,8 +299,10 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name); * @param data_type The OBIType code of the data. * @param nb_lines The number of lines to be stored. * @param nb_elements_per_line The number of elements per line. - * @param elements_names The names of the elements with ';' as separator. + * @param elements_names The names of the elements with ';' as separator (no terminal ';'), + * NULL or "" if the default names are to be used ("0;1;2;...;n"). * @param indexer_name The name of the indexer if there is one associated with the column. + * If NULL or "", the indexer name is set as the column name. * @param comments Optional comments associated with the column. * @param create Whether the column should be created (create == true) or opened (create == false). *