Files
obitools3/src/obidmscolumn.c
Celine Mercier a0da984003 Fixed bug where columns would not get truncated to the right size, and
fixed bug where column directories would be open and not closed in some
instances
2016-09-21 17:28:52 +02:00

1573 lines
44 KiB
C

/****************************************************************************
* OBIDMS columns functions *
****************************************************************************/
/**
* @file obidmscolumn.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 22 May 2015
* @brief Functions shared by all the OBIDMS columns.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
#include <time.h>
#include <fcntl.h>
#include <stdbool.h>
#include <math.h>
#include <sys/mman.h>
#include "obidmscolumn.h"
#include "obidmscolumn_idx.h"
#include "obidmscolumndir.h"
#include "obidms.h"
#include "obitypes.h"
#include "obierrno.h"
#include "obidebug.h"
#include "obilittlebigman.h"
#include "obiblob_indexer.h"
#include "utils.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**************************************************************************
*
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
*
**************************************************************************/
/**
* @brief Internal function building the file name for a column.
*
* The function builds the file name corresponding to a column of an OBIDMS.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column_name The name of the OBIDMS column file.
* @param version_number The version number of the OBIDMS column file.
*
* @returns A pointer to the column file name.
* @retval NULL if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static char* build_column_file_name(const char* column_name, obiversion_t version_number);
/**
* @brief Internal function building the file name for a column version file.
*
* The column version file indicates the latest version number for a column.
* This function returns the name of the file storing this information.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column_name The name of the OBIDMS column.
*
* @returns A pointer to the version file name.
* @retval NULL if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static char* build_version_file_name(const char* column_name);
/**
* @brief Internal function returning a new column version number
* in the OBIDMS database.
*
* @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory().
* @param block Whether the call is blocking or not:
* - `true` the call is blocking
* - `false` the call is not blocking.
*
* @returns The next version number for this column.
* @retval -1 if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block);
/**
* @brief Internal function creating a new column version file
* in the OBIDMS database.
*
* The new file is initialized with the minimum version number `0`.
*
* @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory().
*
* @returns The next usable version number for this column : `0`.
* @retval -1 if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory);
/**
* @brief Internal function setting the elements names of the lines of a
* column in the header of the OBIDMS column structure.
*
* @param column A pointer as returned by obi_create_column().
* @param elements_names The names of the elements with ';' as separator.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names);
/**
* @brief Internal function computing how many lines of an OBIDMS column
* fit in a memory page.
*
* @param data_type The data OBIType.
* @param nb_elements_per_line The number of elements per line.
*
* @returns The line count for one memory page.
*
* @since September 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line);
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
*
************************************************************************/
static char* build_column_file_name(const char* column_name, obiversion_t version_number)
{
char* file_name;
int version_number_length;
// Build the file name
version_number_length = (version_number == 0 ? 1 : (int)(log10(version_number)+1));
file_name = (char*) malloc((strlen(column_name) + version_number_length + 6)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating the memory for a column file name");
return NULL;
}
if (sprintf(file_name,"%s@%d.odc", column_name, version_number) < 0)
{
obi_set_errno(OBICOL_MEMORY_ERROR);
obidebug(1, "\nError building a column file name");
return NULL;
}
return file_name;
}
static char* build_version_file_name(const char* column_name)
{
char* file_name;
// Build the file name
file_name = (char*) malloc((strlen(column_name) + 5)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating the memory for a version file name");
return NULL;
}
if (sprintf(file_name,"%s.odv", column_name) < 0)
{
obi_set_errno(OBICOL_MEMORY_ERROR);
obidebug(1, "\nError building a version file name");
return NULL;
}
return file_name;
}
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block)
{
off_t loc_size;
obiversion_t new_version_number;
char* version_file_name;
int version_file_descriptor;
int lock_mode;
new_version_number = 0;
loc_size = sizeof(obiversion_t);
// Select the correct lockf operation according to the blocking mode
if (block)
lock_mode=F_LOCK;
else
lock_mode=F_TLOCK;
// Build the version file name
version_file_name = build_version_file_name(column_directory->column_name);
if (version_file_name == NULL)
return -1;
// Open the version file
version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDWR);
if (version_file_descriptor < 0)
{
if (errno == ENOENT)
return create_version_file(column_directory);
else
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening a version file");
free(version_file_name);
return -1;
}
}
free(version_file_name);
// Test if the version file size is ok
if (lseek(version_file_descriptor, 0, SEEK_END) < loc_size)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError testing if a version file size is ok");
close(version_file_descriptor);
return -1;
}
// Reset offset to 0
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError positioning offset in version file");
close(version_file_descriptor);
return -1;
}
// Lock the file
if (lockf(version_file_descriptor, lock_mode, loc_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError locking a version file");
close(version_file_descriptor);
return -1;
}
// Read the current version number
if (read(version_file_descriptor, &new_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t)))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError reading a version file");
close(version_file_descriptor);
return -1;
}
new_version_number++;
// Reset offset to 0 to write the new version number
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError positioning offset in version file");
close(version_file_descriptor);
return -1;
}
// Write the new version number
if (write(version_file_descriptor, &new_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t)))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError writing a new version number in a version file");
close(version_file_descriptor);
return -1;
}
// Reset offset to 0 (TODO: why?)
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError positioning offset in version file");
close(version_file_descriptor);
return -1;
}
// Unlock the file
if (lockf(version_file_descriptor, F_ULOCK, loc_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError unlocking a version file");
close(version_file_descriptor);
return -1;
}
if (close(version_file_descriptor) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a version file");
return -1;
}
return new_version_number;
}
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory)
{
off_t loc_size;
obiversion_t version_number;
char* version_file_name;
int version_file_descriptor;
loc_size = sizeof(obiversion_t);
version_number = 0;
version_file_name = build_version_file_name(column_directory->column_name);
if (version_file_name == NULL)
return -1;
// Get the file descriptor associated to the version file
version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (version_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening a version file");
free(version_file_name);
return -1;
}
free(version_file_name);
// Lock the file
if (lockf(version_file_descriptor, F_LOCK, loc_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError locking a version file");
close(version_file_descriptor);
return -1;
}
// Truncate the version file to the right size
if (ftruncate(version_file_descriptor, loc_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError truncating a version file");
close(version_file_descriptor);
return -1;
}
// Position offset to 0 to prepare for writing // TODO Unnecessary?
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError changing offset of a version file");
close(version_file_descriptor);
return -1;
}
// Write version number
if (write(version_file_descriptor, &version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t)))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError writing version number in a version file");
close(version_file_descriptor);
return -1;
}
// Prepare for unlocking
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0) // TODO Unnecessary?
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError preparing a version file for unlocking");
close(version_file_descriptor);
return -1;
}
// Unlock the file
if (lockf(version_file_descriptor, F_ULOCK, loc_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError unlocking a version file");
close(version_file_descriptor);
return -1;
}
if (close(version_file_descriptor) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a version file");
return -1;
}
return version_number;
}
int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names)
{
strcpy((column->header)->elements_names, elements_names);
return 0;
}
index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
{
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
}
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
*
**********************************************************************/
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory)
{
off_t loc_size;
obiversion_t latest_version_number;
char * version_file_name;
int version_file_descriptor;
loc_size = sizeof(obiversion_t);
latest_version_number = 0;
version_file_name = build_version_file_name(column_directory->column_name);
if (version_file_name==NULL)
return -1;
// Get the file descriptor associated to the version file
version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDONLY);
if (version_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening a version file");
free(version_file_name);
return -1;
}
free(version_file_name);
// Check that the version file size is ok
if (lseek(version_file_descriptor, 0, SEEK_END) < loc_size)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError testing if a version file size is ok");
close(version_file_descriptor);
return -1;
}
// Set the offset to 0 in the version file
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError setting the offset of a version file to 0");
close(version_file_descriptor);
return -1;
}
// Read the latest version number
if (read(version_file_descriptor, &latest_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t)))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError reading the latest version number in a version file");
close(version_file_descriptor);
return -1;
}
if (close(version_file_descriptor) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a version file");
return -1;
}
return latest_version_number;
}
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name)
{
OBIDMS_column_directory_p column_directory;
obiversion_t latest_version;
// Get the column directory structure associated to the column
column_directory = obi_open_column_directory(dms, column_name);
if (column_directory == NULL)
{
obidebug(1, "\nProblem opening a column directory structure");
return -1;
}
// Get the latest version number
latest_version = obi_get_latest_version_number(column_directory);
if (latest_version < 0)
{
obidebug(1, "\nProblem getting the latest version number in a column directory");
return -1;
}
return latest_version;
}
size_t obi_get_platform_header_size()
{
size_t header_size;
size_t rounded_header_size;
double multiple;
header_size = sizeof(OBIDMS_column_header_t);
multiple = ceil((double) header_size / (double) getpagesize());
rounded_header_size = multiple * getpagesize();
return rounded_header_size;
}
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* column_name,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
const char* elements_names,
const char* indexer_name,
const char* associated_column_name,
obiversion_t associated_column_version,
const char* comments
)
{
OBIDMS_column_p new_column;
OBIDMS_column_directory_p column_directory;
OBIDMS_column_header_p header;
size_t file_size;
obiversion_t version_number;
char* column_file_name;
int column_file_descriptor;
size_t header_size;
size_t data_size;
index_t minimum_line_count;
OBIType_t returned_data_type;
OBIType_t stored_data_type;
char* final_indexer_name;
new_column = NULL;
// Check that the informations given are not NULL/invalid/greater than the allowed sizes
if (dms == NULL)
{
obidebug(1, "\nCan't create column because of invalid DMS");
return NULL;
}
if (column_name == NULL)
{
obidebug(1, "\nCan't create column because of empty column name");
return NULL;
}
if ((data_type < 1) || (data_type > 8)) // TODO check in more robust way
{
obidebug(1, "\nCan't create column because of invalid data type");
return NULL;
}
// Get the column directory structure associated to the column
column_directory = obi_column_directory(dms, column_name);
if (column_directory == NULL)
{
obi_set_errno(OBICOLDIR_UNKNOWN_ERROR);
obidebug(1, "\nError opening a column directory structure");
return NULL;
}
// Get the latest version number
version_number = obi_get_new_version_number(column_directory, true);
if (version_number < 0)
{
return NULL;
}
// Build the indexer name if needed
if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || (data_type == OBI_QUAL))
{
if (strcmp(indexer_name, "") == 0)
{
final_indexer_name = obi_build_indexer_name(column_name, version_number);
if (final_indexer_name == NULL)
return NULL;
}
else
{
final_indexer_name = (char*) malloc((strlen(indexer_name)+1)*sizeof(char));
strcpy(final_indexer_name, indexer_name);
}
}
returned_data_type = data_type;
if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || (data_type == OBI_QUAL))
// stored data is indices referring to data stored elsewhere
stored_data_type = OBI_IDX;
else
stored_data_type = returned_data_type;
// The initial line count should be between the minimum (corresponding to the page size) and the maximum allowed
minimum_line_count = get_line_count_per_page(stored_data_type, nb_elements_per_line);
if (nb_lines > MAXIMUM_LINE_COUNT)
{
obidebug(1, "\nCan't create column because of line count greater than the maximum allowed (%d)", MAXIMUM_LINE_COUNT);
return NULL;
}
else if (nb_lines < minimum_line_count)
nb_lines = minimum_line_count;
// The number of elements names should be equal to the number of elements per line
if ((elements_names == NULL) && (nb_elements_per_line > 1))
{
obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1");
return NULL;
}
else if ((elements_names != NULL) && (nb_elements_per_line > 1))
{
char* token;
index_t n = 0;
token = strdup(elements_names);
token = strtok(token, ";");
while (token != NULL)
{
token = strtok(NULL, ";");
n++;
}
if (n != nb_elements_per_line)
{
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
return NULL;
}
}
else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0))
{
obidebug(1, "\nCan't create column because the element name does not match the column name");
return NULL;
}
// Calculate the size needed
header_size = obi_get_platform_header_size();
data_size = obi_array_sizeof(stored_data_type, nb_lines, nb_elements_per_line);
file_size = header_size + data_size;
// Get the column file name
column_file_name = build_column_file_name(column_name, version_number);
if (column_file_name == NULL)
{
return NULL;
}
// Open the column file
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (column_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening a column file %s", column_file_name);
free(column_file_name);
return NULL;
}
free(column_file_name);
// Truncate the column file to the right size
if (ftruncate(column_file_descriptor, file_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError truncating a column file to the right size");
close(column_file_descriptor);
return NULL;
}
// Allocate the memory for the column structure
new_column = (OBIDMS_column_p) malloc(sizeof(OBIDMS_column_t));
if (new_column == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError allocating the memory for the column structure");
close(column_file_descriptor);
return NULL;
}
// Fill the column structure
new_column->dms = dms;
new_column->column_directory = column_directory;
new_column->header = mmap(NULL,
header_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
0
);
if (new_column->header == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the header of a column");
close(column_file_descriptor);
free(new_column);
return NULL;
}
new_column->data = mmap(NULL,
data_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
header_size
);
if (new_column->data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the data of a column");
munmap(new_column->header, header_size);
close(column_file_descriptor);
free(new_column);
return NULL;
}
new_column->writable = true;
header = new_column->header;
header->header_size = header_size;
header->data_size = data_size;
header->line_count = nb_lines;
header->lines_used = 0;
header->nb_elements_per_line = nb_elements_per_line;
header->stored_data_type = stored_data_type;
header->returned_data_type = returned_data_type;
header->creation_date = time(NULL);
header->version = version_number;
header->cloned_from = -1;
obi_column_set_elements_names(new_column, elements_names);
strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME);
if (comments != NULL)
strncpy(header->comments, comments, COMMENTS_MAX_LENGTH);
// Store the associated column reference if needed // TODO discuss cases
if (data_type == OBI_QUAL)
{
if (associated_column_name == NULL)
{
obidebug(1, "\nError: The name of the associated column when creating a new column is NULL");
munmap(new_column->header, header_size);
close(column_file_descriptor);
free(new_column);
return NULL;
}
strcpy((header->associated_column).column_name, associated_column_name);
if (associated_column_version == -1)
{
obidebug(1, "\nError: The version of the associated column when creating a new column is not defined");
munmap(new_column->header, header_size);
close(column_file_descriptor);
free(new_column);
return NULL;
}
(header->associated_column).version = associated_column_version;
}
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL))
{
new_column->indexer = obi_indexer(dms, final_indexer_name);
if (new_column->indexer == NULL)
{
obidebug(1, "\nError opening or creating the indexer associated with a column");
munmap(new_column->header, header_size);
close(column_file_descriptor);
free(new_column);
return NULL;
}
strncpy(header->indexer_name, final_indexer_name, INDEXER_MAX_NAME);
}
// Fill the data with NA values
obi_ini_to_NA_values(new_column, 0, nb_lines);
if (close(column_file_descriptor) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a column file");
return NULL;
}
// Add in the list of opened columns
obi_dms_list_column(dms, new_column);
// Set counter to 1 // TODO Discuss counters
new_column->counter = 1;
return new_column;
}
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
const char* column_name,
obiversion_t version_number)
{
OBIDMS_column_p column;
OBIDMS_column_directory_p column_directory;
char* column_file_name;
int column_file_descriptor;
size_t header_size;
column = NULL;
// Get the column directory structure associated to the column
column_directory = obi_open_column_directory(dms, column_name);
if (column_directory == NULL)
{
obidebug(1, "\nError opening a column directory structure");
return NULL;
}
// Get the latest version number if it has the value -1 (not given by user)
if (version_number == -1)
{
version_number = obi_get_latest_version_number(column_directory);
if (version_number < 0)
{
obidebug(1, "\nError getting the latest version number in a column directory");
return NULL;
}
}
// Check if the column is already in the list of opened columns
column = obi_dms_get_column_from_list(dms, column_name, version_number);
// If it's found, increment its counter and return it
if (column != NULL)
{
(column->counter)++;
if (obi_close_column_directory(column_directory) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a column directory");
return NULL;
}
return column;
}
// Get the column file name
column_file_name = build_column_file_name(column_name, version_number);
if (column_file_name == NULL)
{
return NULL;
}
// Open the column file, ALWAYS READ-ONLY
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY);
if (column_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening column file");
free(column_file_name);
return NULL;
}
free(column_file_name);
// Allocate the memory for the column structure
column = (OBIDMS_column_p) malloc(sizeof(OBIDMS_column_t));
if (column == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError allocating the memory for a column structure");
close(column_file_descriptor);
return NULL;
}
// Read the header size
if (read(column_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t)))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError reading the header size to open a column");
close(column_file_descriptor);
free(column);
return NULL;
}
// Fill the column structure
column->dms = dms;
column->column_directory = column_directory;
column->header = mmap(NULL,
header_size,
PROT_READ,
MAP_SHARED,
column_file_descriptor,
0
);
if (column->header == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the header of a column");
close(column_file_descriptor);
free(column);
return NULL;
}
// Map the data
column->data = mmap(NULL,
(column->header)->data_size,
PROT_READ,
MAP_SHARED,
column_file_descriptor,
header_size
);
if (column->data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the data of a column");
munmap(column->header, header_size);
close(column_file_descriptor);
free(column);
return NULL;
}
column->writable = false;
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is opened
if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL))
{
column->indexer = obi_open_indexer(dms, (column->header)->indexer_name);
if (column->indexer == NULL)
{
obidebug(1, "\nError opening the indexer associated with a column");
munmap(column->header, header_size);
close(column_file_descriptor);
free(column);
return NULL;
}
}
if (close(column_file_descriptor) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a column file");
return NULL;
}
// Add in the list of opened columns
obi_dms_list_column(dms, column);
// Set counter to 1
column->counter = 1;
return column;
}
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
OBIDMS_column_p line_selection,
const char* column_name,
obiversion_t version_number,
bool clone_data)
{
OBIDMS_column_p column_to_clone;
OBIDMS_column_p new_column;
index_t nb_lines = 0;
index_t nb_elements_per_line;
OBIType_t data_type;
size_t line_size;
index_t i, index;
column_to_clone = obi_open_column(dms, column_name, version_number);
if (column_to_clone == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening the column to clone");
return NULL;
}
data_type = (column_to_clone->header)->returned_data_type;
nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line;
if (clone_data && (line_selection == NULL))
nb_lines = (column_to_clone->header)->line_count;
else if (clone_data && (line_selection != NULL))
nb_lines = (line_selection->header)->line_count;
new_column = obi_create_column(dms,
column_name,
data_type,
nb_lines,
nb_elements_per_line,
(column_to_clone->header)->elements_names,
(column_to_clone->header)->indexer_name,
((column_to_clone->header)->associated_column).column_name,
((column_to_clone->header)->associated_column).version,
(column_to_clone->header)->comments
);
if (new_column == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError creating the new column when cloning a column");
// The new file is deleted TODO check if it exists before
//const char* column_file_name = build_column_file_name(column_name, version_number);
//if (remove(column_file_name) < 0)
// obidebug(1, "\nError deleting a bad cloned file");
return NULL;
}
(new_column->header)->cloned_from = (column_to_clone->header)->version;
if (clone_data && (line_selection == NULL))
{
if ((new_column->header)->data_size != (column_to_clone->header)->data_size)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError cloning a column: the sizes of the data source and destination are not equal: source %zu bytes, destination %zu bytes.",
(column_to_clone->header)->data_size, (new_column->header)->data_size);
return NULL;
}
// Copy all the data to the new column
memcpy(new_column->data, column_to_clone->data, (column_to_clone->header)->data_size);
(new_column->header)->lines_used = (column_to_clone->header)->lines_used;
}
else if (clone_data && (line_selection != NULL))
{
line_size = obi_sizeof((new_column->header)->stored_data_type) * (new_column->header)->nb_elements_per_line;
// Copy each line at the right index to the new column
for (i=0; i<nb_lines; i++)
{
// Get the index in the line selection column
index = obi_column_get_index(line_selection, i);
// Copy the line at the index in the column to clone to the new column
memcpy((new_column->data)+(i*line_size), (column_to_clone->data)+(index*line_size), line_size);
}
(new_column->header)->lines_used = (line_selection->header)->lines_used;
}
// Close column_to_clone
if (obi_close_column(column_to_clone) < 0)
{
obidebug(1, "\nError closing a column that has been cloned");
// TODO return NULL or not?
}
return new_column;
}
int obi_close_column(OBIDMS_column_p column)
{
int ret_val = 0;
// Truncate the column to the number of lines used if it's not read-only
if (column->writable)
ret_val = obi_truncate_column(column);
(column->counter)--;
if (column->counter == 0)
{
// Delete from the list of opened columns
if (obi_dms_unlist_column(column->dms, column) < 0)
ret_val = -1;
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed
if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL))
if (obi_close_indexer(column->indexer) < 0)
ret_val = -1;
// Munmap data
if (munmap(column->data, (column->header)->data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping column data");
ret_val = -1;
}
// Munmap header
if (munmap(column->header, (column->header)->header_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping a column header");
ret_val = -1;
}
// Close column directory
if (obi_close_column_directory(column->column_directory) < 0)
ret_val = -1;
free(column);
}
return ret_val;
}
int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap/remap?
{
size_t file_size;
size_t data_size;
index_t new_line_count;
double multiple;
int column_file_descriptor;
char* column_file_name;
// Compute the new line count = the number of lines used rounded to the nearest greater multiple of page size greater than 0
multiple = ceil((double) (ONE_IF_ZERO((column->header)->lines_used) * (column->header)->nb_elements_per_line * obi_sizeof((column->header)->stored_data_type)) / (double) getpagesize());
new_line_count = floor((((int) multiple) * getpagesize()) / ((column->header)->nb_elements_per_line * obi_sizeof((column->header)->stored_data_type)));
data_size = obi_array_sizeof((column->header)->stored_data_type, new_line_count, (column->header)->nb_elements_per_line);
// Check that it is actually greater than the current data size, otherwise no need to truncate
if ((column->header)->data_size == data_size)
return 0;
else if ((column->header)->data_size < data_size)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError truncating a column: The current data size seems smaller than needed.");
return -1;
}
// Get the column file name
column_file_name = build_column_file_name((column->header)->name, (column->header)->version);
if (column_file_name == NULL)
return -1;
// Open the column file
column_file_descriptor = openat((column->column_directory)->dir_fd, column_file_name, O_RDWR);
if (column_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError getting the file descriptor of a column file");
free(column_file_name);
return -1;
}
free(column_file_name);
// Unmap the data before truncating the file
if (munmap(column->data, (column->header)->data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping the data of a column before truncating");
close(column_file_descriptor);
return -1;
}
// Truncate the column file
file_size = (column->header)->header_size + data_size;
if (ftruncate(column_file_descriptor, file_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError truncating a column file at the number of lines used");
close(column_file_descriptor);
return -1;
}
// Remap the data
column->data = mmap(NULL,
data_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
(column->header)->header_size
);
if (column->data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError re-mmapping the data of a column after truncating");
close(column_file_descriptor);
return -1;
}
// Set new line_count and new data size
(column->header)->line_count = new_line_count;
(column->header)->data_size = data_size;
if (close(column_file_descriptor) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a column file");
return -1;
}
return 0;
}
int obi_enlarge_column(OBIDMS_column_p column)
{
size_t file_size;
size_t old_data_size;
size_t new_data_size;
size_t header_size;
index_t old_line_count;
index_t new_line_count;
int column_file_descriptor;
char* column_file_name;
// Check if the column is read-only
if (!(column->writable))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to enlarge a read-only column");
return -1;
}
// Get the column file name
column_file_name = build_column_file_name((column->header)->name, (column->header)->version);
if (column_file_name == NULL)
{
return -1;
}
// Open the column file
column_file_descriptor = openat((column->column_directory)->dir_fd, column_file_name, O_RDWR);
if (column_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError getting the file descriptor of a column file");
free(column_file_name);
return -1;
}
free(column_file_name);
// Calculate the new file size
old_line_count = (column->header)->line_count;
new_line_count = old_line_count * COLUMN_GROWTH_FACTOR;
if (new_line_count > MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError enlarging a column file: new line count greater than the maximum allowed");
close(column_file_descriptor);
return -1;
}
old_data_size = (column->header)->data_size;
new_data_size = obi_array_sizeof((column->header)->stored_data_type, new_line_count, (column->header)->nb_elements_per_line);
header_size = (column->header)->header_size;
file_size = header_size + new_data_size;
// Enlarge the file // TODO isn't it possible that this makes the file "move"?
if (ftruncate(column_file_descriptor, file_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError enlarging a column file");
close(column_file_descriptor);
return -1;
}
// Unmap and remap the data
if (munmap(column->data, old_data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping the data of a column before enlarging");
close(column_file_descriptor);
return -1;
}
column->data = mmap(NULL,
new_data_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
header_size
);
if (column->data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError re-mmapping the data of a column after enlarging the file");
close(column_file_descriptor);
return -1;
}
// Set new line count and new data size
(column->header)->line_count = new_line_count;
(column->header)->data_size = new_data_size;
// Initialize new data lines to NA
obi_ini_to_NA_values(column, old_line_count, new_line_count - old_line_count);
if (close(column_file_descriptor) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a column file");
return -1;
}
return 0;
}
void obi_ini_to_NA_values(OBIDMS_column_p column,
index_t first_line_nb,
index_t nb_lines)
{
index_t i, start, end, nb_elements;
nb_elements = nb_lines*((column->header)->nb_elements_per_line);
start = first_line_nb*((column->header)->nb_elements_per_line);
end = start + nb_elements;
switch ((column->header)->stored_data_type) {
case OBI_VOID: // TODO;
break;
case OBI_INT: for (i=start;i<end;i++)
{
*(((obiint_t*) (column->data)) + i) = OBIInt_NA;
}
break;
case OBI_FLOAT: for (i=start;i<end;i++)
{
*(((obifloat_t*) (column->data)) + i) = OBIFloat_NA;
}
break;
case OBI_BOOL: for (i=start;i<end;i++)
{
*(((obibool_t*) (column->data)) + i) = OBIBool_NA;
}
break;
case OBI_CHAR: for (i=start;i<end;i++)
{
*(((obichar_t*) (column->data)) + i) = OBIChar_NA;
}
break;
case OBI_IDX: for (i=start;i<end;i++)
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
break;
case OBI_QUAL: for (i=start;i<end;i++) // case not used since OBI_QUAL is only a returned_data_type
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
break;
case OBI_STR: for (i=start;i<end;i++) // case not used since OBI_QUAL is only a returned_data_type
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
break;
case OBI_SEQ: for (i=start;i<end;i++) // case not used since OBI_QUAL is only a returned_data_type
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
break;
}
}
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number)
{
OBIDMS_column_header_p header;
OBIDMS_column_directory_p column_directory;
char* column_file_name;
int column_file_descriptor;
size_t header_size;
// Get the column directory structure associated to the column
column_directory = obi_open_column_directory(dms, column_name);
if (column_directory == NULL)
{
obidebug(1, "\nError opening a column directory structure");
return NULL;
}
// Get the latest version number if not provided
if (version_number < 0)
{
version_number = obi_get_latest_version_number(column_directory);
if (version_number < 0)
{
obidebug(1, "\nError getting the latest version number in a column directory");
return NULL;
}
}
// Get the column file name
column_file_name = build_column_file_name(column_name, version_number);
if (column_file_name == NULL)
{
return NULL;
}
// Open the column file (READ-ONLY)
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY);
if (column_file_descriptor < 0)
{
obidebug(1, "\nError opening a column file");
obi_set_errno(OBICOL_UNKNOWN_ERROR);
free(column_file_name);
return NULL;
}
free(column_file_name);
// Read the header size
if (read(column_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t)))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError reading the header size to read a header");
close(column_file_descriptor);
return NULL;
}
// Fill the header structure
header = mmap(NULL,
header_size,
PROT_READ,
MAP_SHARED,
column_file_descriptor,
0
);
if (header == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the header of a column");
close(column_file_descriptor);
return NULL;
}
if (close(column_file_descriptor) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError closing a column file");
return NULL;
}
return header;
}
int obi_close_header(OBIDMS_column_header_p header)
{
if (munmap(header, header->header_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping a column header");
return -1;
}
return 0;
}
// TODO to be rewritten in an optimized and safe way if possible
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
{
char* elements_names;
char* name;
index_t element_index;
elements_names = strdup((column->header)->elements_names);
if (elements_names == NULL)
{
obidebug(1, "\nError strdup-ing the elements names");
return OBIIdx_NA;
}
element_index = 0;
name = strtok(elements_names, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
return element_index;
}
element_index++;
while (name != NULL)
{
name = strtok(NULL, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
return element_index;
}
element_index++;
}
obidebug(1, "\nCan't find an element name");
free(elements_names);
return OBIIdx_NA;
}
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
{
// Check if the column is read-only
if (!(column->writable))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value in a read-only column");
return -1;
}
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
return 0;
}
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb) // TODO problem with some columns in a view being empty or shorter and triggering an error because they've been truncated when the view was closed. Fixed with obiview.c in update_lines() for now
{
if ((line_nb+1) > ((column->header)->line_count))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines of the column");
return -1;
}
return 0;
}