Major update: Column aliases. Columns are now identified in the context

of a view by an alias that can be modified.
This commit is contained in:
Celine Mercier
2016-08-01 18:25:30 +02:00
parent 3843485a04
commit 312f50ff0f
17 changed files with 760 additions and 114 deletions

View File

@ -30,6 +30,7 @@
#include "obierrno.h"
#include "obidebug.h"
#include "obilittlebigman.h"
#include "hashtable.h"
#include "utils.h"
@ -42,6 +43,16 @@
*
**************************************************************************/
/**
* Internal function calculating the size of the file where the informations about an obiview are stored.
*
* @returns The size of the file in bytes.
*
* @since June 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t get_platform_view_file_size();
/**
* Internal function building the file name where the informations about an obiview are stored.
@ -82,6 +93,14 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name);
* The column references stored in the mapped view infos structures are updated
* to match the columns opened in the opened view structure.
*
* @warning The column pointer array should be up to date before using this function.
* @warning Aliases are not updated by this function and have to be edited separately.
* This function simply reads the column pointer array associated with the view
* and fills the column names and versions in the column reference array accordingly,
* without touching the alias.
* That means that for example if there is a shift in the column pointer array, this
* function should not be used.
*
* @param view A pointer on the view.
*
* @since June 2016
@ -90,6 +109,69 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name);
void update_column_refs(Obiview_p view);
/**
* @brief Internal function creating the column dictionary associated with a view.
*
* The column dictionary is built from the column references array, and associates each column alias
* with the pointer on the column.
*
* @warning The column reference array and the column pointer array should be up to date before using this function.
*
* @param view A pointer on the view.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int create_column_dict(Obiview_p view);
/**
* @brief Internal function updating the column dictionary associated with a view.
*
* The column dictionary is built from the column references array, and associates each column alias
* with the pointer on the column.
*
* @warning The column reference array and the column pointer array should be up to date before using this function.
*
* @param view A pointer on the view.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int update_column_dict(Obiview_p view);
/**
* @brief Internal function updating the column reference array and the column dictionary associated with a view.
*
* The column reference array is updated from the column pointer array, then the column dictionary that
* and associates each column alias with the pointer on the column is updated from the column reference array.
*
* @warning The column pointer array should be up to date before using this function.
* @warning Aliases are not updated by this function and have to be edited separately.
* This function simply reads the column pointer array associated with the view
* and fills the column names and versions in the column reference array accordingly,
* without touching the alias.
* That means that for example if there is a shift in the column pointer array, this
* function should not be used.
*
* @param view A pointer on the view.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int update_column_refs_and_dict(Obiview_p view);
/**
* @brief Internal function to update the line count in the context of a view.
*
@ -368,19 +450,72 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name)
}
void update_column_refs(Obiview_p view)
{
int i;
for (i=0; i < (view->infos)->column_count; i++)
{
strcpy((((view->infos)->column_references)+i)->column_name, (((view->columns)[i])->header)->name);
(((view->infos)->column_references)+i)->version = (((view->columns)[i])->header)->version;
strcpy(((((view->infos)->column_references)[i]).column_refs).column_name, (((view->columns)[i])->header)->name);
((((view->infos)->column_references)[i]).column_refs).version = (((view->columns)[i])->header)->version;
}
}
int create_column_dict(Obiview_p view)
{
int i;
view->column_dict = ht_create(MAX_NB_OPENED_COLUMNS);
if (view->column_dict == NULL)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError creating a column dictionary");
return -1;
}
// Rebuild the dictionary from the column references and the column pointer array associated with the view
for (i=0; i < (view->infos)->column_count; i++)
{
// Check that each alias is unique
if (ht_get(view->column_dict, (((view->infos)->column_references)[i]).alias) != NULL)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError: the name/alias identifying a column in a view is not unique");
return -1;
}
if (ht_set(view->column_dict, (((view->infos)->column_references)[i]).alias, (view->columns)[i]) < 0)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError adding a column in a column dictionary");
return -1;
}
}
return 0;
}
int update_column_dict(Obiview_p view)
{
// Re-initialize the dictionary to rebuild it from scratch
ht_free(view->column_dict);
if (create_column_dict(view) < 0)
return -1;
return 0;
}
int update_column_refs_and_dict(Obiview_p view)
{
update_column_refs(view);
return update_column_dict(view);
}
int update_lines(Obiview_p view, index_t line_count)
{
int i;
@ -408,6 +543,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
OBIDMS_column_p current_line_selection = NULL;
OBIDMS_column_p column = NULL;
OBIDMS_column_p column_buffer;
bool found;
// Check that the view is not read-only
if (view->read_only)
@ -422,9 +558,10 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
else
current_line_selection = view->line_selection;
found = false;
for (i=0; i<((view->infos)->column_count); i++)
{
if ((current_line_selection != NULL) || (!(strcmp((((view->columns)[i])->header)->name, column_name))))
if ((current_line_selection != NULL) || (!strcmp((((view->infos)->column_references)[i]).alias, column_name)))
{ // Clone with the right line selection and replace (for all columns if there is a line selection)
// Save pointer to close column after cloning
@ -442,7 +579,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
// Close old cloned column
obi_close_column(column_buffer);
if (!(strcmp((((view->columns)[i])->header)->name, column_name)))
if (!strcmp((((view->infos)->column_references)[i]).alias, column_name))
{ // Found the column to return
column = (view->columns)[i];
}
@ -464,8 +601,8 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
view->new_line_selection = NULL;
}
// Update column references in view infos
update_column_refs(view);
// Update column refs and dict
update_column_refs_and_dict(view);
return column;
}
@ -473,7 +610,8 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
{
char* column_name;
int i;
char* column_name = NULL;
// Check that the view is not read-only
if (view->read_only)
@ -491,22 +629,25 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
if (view->line_selection != NULL)
(*line_nb_p) = *(((index_t*) ((view->line_selection)->data)) + (*line_nb_p));
column_name = (char*) malloc(strlen(((*column_pp)->header)->name) * sizeof(char));
// Get the name/alias of the column from the pointer
for (i=0; i<((view->infos)->column_count); i++)
{
if (obi_view_get_column(view, (((view->infos)->column_references)[i]).alias) == *column_pp)
column_name = (((view->infos)->column_references)[i]).alias;
}
if (column_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError trying to allocate memory for a column name");
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to clone a column in a view: column alias not found from pointer");
return -1;
}
strcpy(column_name, ((*column_pp)->header)->name);
(*column_pp) = clone_column_in_view(view, column_name);
if ((*column_pp) == NULL)
{
obidebug(1, "\nError trying to clone a column to modify it");
return -1;
}
free(column_name);
}
if (((*line_nb_p)+1) > (view->infos)->line_count)
@ -797,6 +938,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
}
view->dms = dms;
view->read_only = 0;
// Create view file
if (create_obiview_file(dms, view_name) < 0)
@ -888,24 +1030,12 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
(view->infos)->line_count = (view_to_clone->infos)->line_count;
}
for (i=0; i<((view_to_clone->infos)->column_count); i++)
{
(view->columns)[i] = obi_open_column(dms, (((view_to_clone->columns)[i])->header)->name, (((view_to_clone->columns)[i])->header)->version);
if ((view->columns)[i] == NULL)
{
if (view->line_selection != NULL)
obi_close_column(view->line_selection);
obi_view_unmap_file(view->dms, view->infos);
free(view);
return NULL;
}
}
(view->infos)->column_count = (view_to_clone->infos)->column_count;
// Fill informations
strcpy((view->infos)->view_type, (view_to_clone->infos)->view_type);
strcpy((view->infos)->created_from, (view_to_clone->infos)->name);
view->new_line_selection = NULL;
}
// Else, fill empty view structure
else
{
@ -919,10 +1049,10 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
//view->columns = NULL; // TODO
}
// Fill last informations
strcpy((view->infos)->name, view_name);
strcpy((view->infos)->comments, comments);
(view->infos)->creation_date = time(NULL);
view->read_only = 0;
view->nb_predicates = 0;
view->predicate_functions = NULL;
@ -938,8 +1068,44 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
}
// Store references for columns
update_column_refs(view);
// Create the column dictionary (hash table) associating column names (or aliases) to column pointers
if (create_column_dict(view) < 0)
{
obi_close_view(view);
return NULL;
}
// Once the view has been created with all its elements and informations, add the columns if the view is cloned from another view
// Add the columns from the view to clone in the new view
if (view_to_clone != NULL)
{
(view->infos)->column_count = 0;
for (i=0; i<((view_to_clone->infos)->column_count); i++)
{
if (obi_view_add_column(view,
(((view_to_clone->columns)[i])->header)->name,
(((view_to_clone->columns)[i])->header)->version,
(((view_to_clone->infos)->column_references)[i]).alias,
0,
(view->infos)->line_count,
0,
NULL,
NULL,
NULL,
-1,
NULL,
false)
< 0)
{
obidebug(1, "\nError adding a column in a new view from a view to clone");
if (view->line_selection != NULL)
obi_close_column(view->line_selection);
obi_view_unmap_file(view->dms, view->infos);
free(view);
return NULL;
}
}
}
return view;
}
@ -985,26 +1151,26 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
if (view_to_clone == NULL)
{
// Adding sequence column
if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", NULL, -1, "Nucleotide sequences", true) < 0)
if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NUC_SEQUENCE_COLUMN, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", NULL, -1, "Nucleotide sequences", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Adding id column
if (obi_view_add_column(view, ID_COLUMN, -1, OBI_STR, 0, 1, ID_COLUMN, "", NULL, -1, "Ids", true) < 0)
if (obi_view_add_column(view, ID_COLUMN, -1, ID_COLUMN, OBI_STR, 0, 1, ID_COLUMN, "", NULL, -1, "Ids", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Adding definition column
if (obi_view_add_column(view, DEFINITION_COLUMN, -1, OBI_STR, 0, 1, DEFINITION_COLUMN, "", NULL, -1, "Definitions", true) < 0)
if (obi_view_add_column(view, DEFINITION_COLUMN, -1, DEFINITION_COLUMN, OBI_STR, 0, 1, DEFINITION_COLUMN, "", NULL, -1, "Definitions", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Adding quality column
associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN);
if (obi_view_add_column(view, QUALITY_COLUMN, -1, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
@ -1131,8 +1297,11 @@ int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
{
Obiview_p view;
int i;
Obiview_p view;
const char* column_name;
obiversion_t column_version;
OBIDMS_column_p column_pointer;
int i;
// Alllocate the memory for the view structure
view = (Obiview_p) malloc(sizeof(Obiview_t));
@ -1164,13 +1333,18 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
// Open the columns to read
for (i=0; i < ((view->infos)->column_count); i++)
{
(view->columns)[i] = obi_open_column(dms, (((view->infos)->column_references)+i)->column_name, (((view->infos)->column_references)+i)->version);
if ((view->columns)[i] == NULL)
column_name = ((((view->infos)->column_references)[i]).column_refs).column_name;
column_version = ((((view->infos)->column_references)[i]).column_refs).version;
column_pointer = obi_open_column(dms, column_name, column_version);
if (column_pointer == NULL)
{
obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, (((view->infos)->column_references)+i)->column_name, (((view->infos)->column_references)+i)->version);
obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, column_name, column_version);
obi_close_view(view);
return NULL;
}
(view->columns)[i] = column_pointer;
}
view->dms = dms;
@ -1179,6 +1353,14 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
view->nb_predicates = 0;
view->predicate_functions = NULL;
// Create the column dictionary associating each column alias with its pointer
if (create_column_dict(view) < 0)
{
obidebug(1, "\nError creating the column dictionary when opening a view");
obi_close_view(view);
return NULL;
}
return view;
}
@ -1186,6 +1368,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
int obi_view_add_column(Obiview_p view,
const char* column_name,
obiversion_t version_number,
const char* alias,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
@ -1196,10 +1379,10 @@ int obi_view_add_column(Obiview_p view,
const char* comments,
bool create) // all infos for creation or open
{
int i;
OBIDMS_column_p column;
OBIDMS_column_p column_buffer;
OBIDMS_column_p current_line_selection;
int i;
OBIDMS_column_p column;
OBIDMS_column_p column_buffer;
OBIDMS_column_p current_line_selection;
// Check that the view is not read-only
if (view->read_only)
@ -1275,14 +1458,29 @@ int obi_view_add_column(Obiview_p view,
return -1;
}
// Store column in the view
// Store column pointer in the view structure
(view->columns)[(view->infos)->column_count] = column;
// If an alias is not defined, it's the original name of the column. // TODO discuss
if (alias == NULL)
alias = column_name;
// Save column alias
strcpy((((view->infos)->column_references)[(view->infos)->column_count]).alias, alias);
(view->infos)->column_count++;
if ((view->infos)->column_count == 1) // first column in the view
(view->infos)->line_count = (column->header)->lines_used;
(view->infos)->line_count = nb_lines;
// Update reference in view infos
update_column_refs(view);
// Update column references and dictionary
update_column_refs_and_dict(view);
// // Print dict
// for (i=0; i<((view->infos)->column_count); i++)
// {
// fprintf(stderr, "\n\nalias: %s", (((view->infos)->column_references)[i]).alias);
// fprintf(stderr, "\npointer: %x\n", obi_view_get_column(view, (((view->infos)->column_references)[i]).alias));
// }
return 0;
}
@ -1303,59 +1501,107 @@ int obi_view_delete_column(Obiview_p view, const char* column_name)
return -1;
}
found = 0;
found = false;
for (i=0; i<((view->infos)->column_count); i++)
{
if (!strcmp((((view->columns)[i])->header)->name, column_name))
if ((!found) && (!strcmp((((view->infos)->column_references)[i]).alias, column_name)))
{
obi_close_column((view->columns)[i]);
found = 1;
found = true;
}
if (found)
{
if (i != (((view->infos)->column_count) - 1)) // not the last one
{ // Shift the pointer and the references
(view->columns)[i] = (view->columns)[i+1];
strcpy((((view->infos)->column_references)[i]).alias, (((view->infos)->column_references)[i+1]).alias);
strcpy(((((view->infos)->column_references)[i]).column_refs).column_name, ((((view->infos)->column_references)[i+1]).column_refs).column_name);
((((view->infos)->column_references)[i]).column_refs).version = ((((view->infos)->column_references)[i+1]).column_refs).version;
}
else // Last column
(view->columns)[i] = NULL;
}
}
if (!found)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to delete a column: column not found");
return -1;
}
((view->infos)->column_count)--;
// Update reference in view infos
update_column_refs(view);
// Update column dictionary
update_column_dict(view);
return 0;
}
OBIDMS_column_p obi_view_get_column(Obiview_p view, const char* column_name)
{
return (OBIDMS_column_p)(ht_get(view->column_dict, column_name));
}
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name)
{
int i;
for (i=0; i<((view->infos)->column_count); i++)
for (i=0; i < (view->infos)->column_count; i++)
{
if (!(strcmp((((view->columns)[i])->header)->name, column_name)))
return (view->columns)[i];
if (strcmp((((view->infos)->column_references)[i]).alias, column_name) == 0)
return ((view->columns)+i);
}
obidebug(1, "\nError: column not found");
return NULL;
}
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name) // TODO delete?
int obi_view_create_column_alias(Obiview_p view, const char* current_name, const char* alias)
{
int i;
bool found;
// Check that the view is not read-only
if (view->read_only)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to change a column alias in a read-only view");
return -1;
}
// Check that the new alias is unique
if (ht_get(view->column_dict, alias) != NULL)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError: the new name/alias identifying a column in a view is not unique");
return -1;
}
// Set the new alias in the column references
found = false;
for (i=0; i<((view->infos)->column_count); i++)
{
if (!(strcmp((((view->columns)[i])->header)->name, column_name)))
return ((view->columns)+i);
if (!strcmp((((view->infos)->column_references)[i]).alias, current_name))
{
strcpy((((view->infos)->column_references)[i]).alias, alias);
found = true;
}
}
return NULL;
if (found == false)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError: can't find the column '%s' to change its alias", current_name);
return -1;
}
// Update the column dictionary
update_column_dict(view);
return 0;
}
@ -1503,6 +1749,9 @@ int obi_close_view(Obiview_p view)
}
}
// Free the column dictionary
ht_free(view->column_dict);
// Unmap view file
if (obi_view_unmap_file(view->dms, view->infos) < 0)
{