Files
obitools3/src/obidmscolumn.c

1486 lines
41 KiB
C
Raw Normal View History

2015-05-22 17:54:34 +02:00
/****************************************************************************
* OBIDMS columns functions *
2015-05-22 17:54:34 +02:00
****************************************************************************/
/**
* @file obidmscolumn.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
2015-05-22 17:54:34 +02:00
* @date 22 May 2015
* @brief Functions shared by all the OBIDMS columns.
2015-05-22 17:54:34 +02:00
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
#include <time.h>
2015-05-26 10:38:56 +02:00
#include <fcntl.h>
#include <stdbool.h>
#include <math.h>
#include <sys/mman.h>
2015-05-22 17:54:34 +02:00
#include "obidmscolumn.h"
#include "obidmscolumndir.h"
#include "obidms.h"
#include "obitypes.h"
#include "obierrno.h"
#include "obidebug.h"
#include "obilittlebigman.h"
2016-04-12 14:53:33 +02:00
#include "obiblob_indexer.h"
#include "utils.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
2015-05-22 17:54:34 +02:00
/**************************************************************************
2015-05-26 10:38:56 +02:00
*
2015-06-10 15:19:02 +02:00
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
2015-05-26 10:38:56 +02:00
*
**************************************************************************/
2015-05-26 10:38:56 +02:00
/**
* @brief Internal function building the file name for a column.
*
2015-06-10 15:19:02 +02:00
* The function builds the file name corresponding to a column of an OBIDMS.
2015-05-26 10:38:56 +02:00
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column_name The name of the OBIDMS column file.
* @param version_number The version number of the OBIDMS column file.
2015-05-26 10:38:56 +02:00
*
* @returns A pointer to the column file name.
* @retval NULL if an error occurred.
2015-05-26 10:38:56 +02:00
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static char* build_column_file_name(const char* column_name, obiversion_t version_number);
2015-06-10 15:19:02 +02:00
2015-05-26 10:38:56 +02:00
/**
* @brief Internal function building the file name for a column version file.
2015-05-26 10:38:56 +02:00
*
* The column version file indicates the latest version number for a column.
* This function returns the name of the file storing this information.
2015-05-26 10:38:56 +02:00
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column_name The name of the OBIDMS column.
2015-05-26 10:38:56 +02:00
*
* @returns A pointer to the version file name.
* @retval NULL if an error occurred.
2015-05-26 10:38:56 +02:00
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static char* build_version_file_name(const char* column_name);
2015-05-26 10:38:56 +02:00
2015-06-10 15:19:02 +02:00
2015-05-26 10:38:56 +02:00
/**
* @brief Internal function returning a new column version number
* in the OBIDMS database.
2015-05-26 10:38:56 +02:00
*
* @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory().
* @param block Whether the call is blocking or not:
* - `true` the call is blocking
* - `false` the call is not blocking.
2015-05-26 10:38:56 +02:00
*
* @returns The next version number for this column.
* @retval -1 if an error occurred.
2015-05-26 10:38:56 +02:00
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block);
2015-05-26 10:38:56 +02:00
2015-06-10 15:19:02 +02:00
2015-05-26 10:38:56 +02:00
/**
* @brief Internal function creating a new column version file
* in the OBIDMS database.
2015-05-26 10:38:56 +02:00
*
* The new file is initialized with the minimum version number `0`.
*
* @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory().
2015-05-26 10:38:56 +02:00
*
* @returns The next usable version number for this column : `0`.
* @retval -1 if an error occurred.
2015-05-26 10:38:56 +02:00
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory);
2015-05-26 10:38:56 +02:00
2015-06-10 15:19:02 +02:00
/**
* @brief Internal function setting the elements names of the lines of a
* column in the header of the OBIDMS column structure.
*
* @param column A pointer as returned by obi_create_column().
* @param elements_names The names of the elements with ';' as separator.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names);
/**
* @brief Internal function computing how many lines of an OBIDMS column
* fit in a memory page.
*
* @param data_type The data OBIType.
* @param nb_elements_per_line The number of elements per line.
*
* @returns The line count for one memory page.
*
* @since September 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line);
2015-06-10 15:19:02 +02:00
/************************************************************************
2015-05-26 10:38:56 +02:00
*
2015-06-10 15:19:02 +02:00
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
2015-05-26 10:38:56 +02:00
*
2015-06-10 15:19:02 +02:00
************************************************************************/
2015-05-26 10:38:56 +02:00
static char* build_column_file_name(const char* column_name, obiversion_t version_number)
{
char* file_name;
2016-03-21 11:33:06 +01:00
int version_number_length;
2015-05-26 10:38:56 +02:00
// Build the file name
2016-03-21 11:33:06 +01:00
version_number_length = (version_number == 0 ? 1 : (int)(log10(version_number)+1));
file_name = (char*) malloc((strlen(column_name) + version_number_length + 6)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating the memory for a column file name");
return NULL;
}
2016-03-21 11:33:06 +01:00
if (sprintf(file_name,"%s@%d.odc", column_name, version_number) < 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_MEMORY_ERROR);
obidebug(1, "\nError building a column file name");
2015-05-26 10:38:56 +02:00
return NULL;
}
return file_name;
2015-05-26 10:38:56 +02:00
}
static char* build_version_file_name(const char* column_name)
{
char* file_name;
2015-05-26 10:38:56 +02:00
// Build the file name
2016-03-21 11:33:06 +01:00
file_name = (char*) malloc((strlen(column_name) + 5)*sizeof(char));
if (file_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating the memory for a version file name");
return NULL;
}
2016-03-21 11:33:06 +01:00
if (sprintf(file_name,"%s.odv", column_name) < 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_MEMORY_ERROR);
obidebug(1, "\nError building a version file name");
2015-05-26 10:38:56 +02:00
return NULL;
}
return file_name;
2015-05-26 10:38:56 +02:00
}
2015-06-10 15:19:02 +02:00
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block)
{
off_t loc_size;
obiversion_t new_version_number;
char* version_file_name;
int version_file_descriptor;
int lock_mode;
2015-05-26 10:38:56 +02:00
new_version_number = 0;
loc_size = sizeof(obiversion_t);
2015-05-26 10:38:56 +02:00
// Select the correct lockf operation according to the blocking mode
if (block)
lock_mode=F_LOCK;
2015-05-26 10:38:56 +02:00
else
lock_mode=F_TLOCK;
2015-05-26 10:38:56 +02:00
// Build the version file name
version_file_name = build_version_file_name(column_directory->column_name);
if (version_file_name == NULL)
2015-05-26 10:38:56 +02:00
return -1;
// Open the version file
version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDWR);
if (version_file_descriptor < 0)
{
2015-05-26 10:38:56 +02:00
if (errno == ENOENT)
return create_version_file(column_directory);
2015-05-26 10:38:56 +02:00
else
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening a version file");
free(version_file_name);
2015-05-26 10:38:56 +02:00
return -1;
}
}
free(version_file_name);
2015-05-26 10:38:56 +02:00
// Test if the version file size is ok
if (lseek(version_file_descriptor, 0, SEEK_END) < loc_size)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError testing if a version file size is ok");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Reset offset to 0
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError positioning offset in version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Lock the file
if (lockf(version_file_descriptor, lock_mode, loc_size) < 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError locking a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Read the current version number
if (read(version_file_descriptor, &new_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t)))
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError reading a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
new_version_number++;
2015-05-26 10:38:56 +02:00
// Reset offset to 0 to write the new version number
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError positioning offset in version file");
close(version_file_descriptor);
return -1;
}
// Write the new version number
if (write(version_file_descriptor, &new_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t)))
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError writing a new version number in a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Reset offset to 0 (TODO: why?)
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError positioning offset in version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Unlock the file
if (lockf(version_file_descriptor, F_ULOCK, loc_size) < 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError unlocking a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
close(version_file_descriptor);
return new_version_number;
2015-05-26 10:38:56 +02:00
}
2015-06-10 15:19:02 +02:00
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory)
{
off_t loc_size;
obiversion_t version_number;
char* version_file_name;
int version_file_descriptor;
2015-05-26 10:38:56 +02:00
loc_size = sizeof(obiversion_t);
version_number = 0;
2015-05-26 10:38:56 +02:00
version_file_name = build_version_file_name(column_directory->column_name);
if (version_file_name == NULL)
2015-05-26 10:38:56 +02:00
return -1;
// Get the file descriptor associated to the version file
version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (version_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening a version file");
free(version_file_name);
2015-05-26 10:38:56 +02:00
return -1;
}
free(version_file_name);
2015-05-26 10:38:56 +02:00
// Lock the file
if (lockf(version_file_descriptor, F_LOCK, loc_size) < 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError locking a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Truncate the version file to the right size
if (ftruncate(version_file_descriptor, loc_size) < 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError truncating a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Position offset to 0 to prepare for writing // TODO Unnecessary?
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError changing offset of a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Write version number
if (write(version_file_descriptor, &version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t)))
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError writing version number in a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Prepare for unlocking
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0) // TODO Unnecessary?
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError preparing a version file for unlocking");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Unlock the file
if (lockf(version_file_descriptor, F_ULOCK, loc_size) < 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError unlocking a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
close(version_file_descriptor);
return version_number;
2015-05-26 10:38:56 +02:00
}
2015-06-10 15:19:02 +02:00
int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names)
{
strcpy((column->header)->elements_names, elements_names);
return 0;
}
index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
{
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
}
/**********************************************************************
2015-05-26 10:38:56 +02:00
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
2015-05-26 10:38:56 +02:00
*
**********************************************************************/
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory)
{
off_t loc_size;
obiversion_t latest_version_number;
char * version_file_name;
int version_file_descriptor;
loc_size = sizeof(obiversion_t);
latest_version_number = 0;
version_file_name = build_version_file_name(column_directory->column_name);
if (version_file_name==NULL)
2015-05-26 10:38:56 +02:00
return -1;
// Get the file descriptor associated to the version file
version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDONLY);
if (version_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening a version file");
free(version_file_name);
2015-05-26 10:38:56 +02:00
return -1;
}
free(version_file_name);
// Check that the version file size is ok
if (lseek(version_file_descriptor, 0, SEEK_END) < loc_size)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError testing if a version file size is ok");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Set the offset to 0 in the version file
if (lseek(version_file_descriptor, 0, SEEK_SET) != 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError setting the offset of a version file to 0");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
// Read the latest version number
if (read(version_file_descriptor, &latest_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t)))
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError reading the latest version number in a version file");
close(version_file_descriptor);
2015-05-26 10:38:56 +02:00
return -1;
}
close(version_file_descriptor);
return latest_version_number;
2015-05-26 10:38:56 +02:00
}
2015-06-10 15:19:02 +02:00
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name)
{
OBIDMS_column_directory_p column_directory;
obiversion_t latest_version;
// Get the column directory structure associated to the column
column_directory = obi_open_column_directory(dms, column_name);
if (column_directory == NULL)
{
obidebug(1, "\nProblem opening a column directory structure");
return -1;
}
// Get the latest version number
latest_version = obi_get_latest_version_number(column_directory);
if (latest_version < 0)
{
obidebug(1, "\nProblem getting the latest version number in a column directory");
return -1;
}
return latest_version;
}
2015-05-26 10:38:56 +02:00
size_t obi_get_platform_header_size()
{
size_t header_size;
size_t rounded_header_size;
double multiple;
header_size = sizeof(OBIDMS_column_header_t);
multiple = ceil((double) header_size / (double) getpagesize());
rounded_header_size = multiple * getpagesize();
return rounded_header_size;
2015-05-26 10:38:56 +02:00
}
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* column_name,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
const char* elements_names,
2016-04-12 14:53:33 +02:00
const char* indexer_name,
2016-02-18 10:38:51 +01:00
const char* comments
)
2015-05-26 10:38:56 +02:00
{
OBIDMS_column_p new_column;
OBIDMS_column_directory_p column_directory;
OBIDMS_column_header_p header;
size_t file_size;
obiversion_t version_number;
char* column_file_name;
int column_file_descriptor;
size_t header_size;
size_t data_size;
index_t minimum_line_count;
OBIType_t returned_data_type;
OBIType_t stored_data_type;
char* final_indexer_name;
new_column = NULL;
// Check that the informations given are not NULL/invalid/greater than the allowed sizes
if (dms == NULL)
{
obidebug(1, "\nCan't create column because of invalid DMS");
return NULL;
}
if (column_name == NULL)
{
obidebug(1, "\nCan't create column because of empty column name");
return NULL;
}
2016-02-18 10:38:51 +01:00
if ((data_type < 1) || (data_type > 7))
{
obidebug(1, "\nCan't create column because of invalid data type");
return NULL;
}
// Get the column directory structure associated to the column
column_directory = obi_column_directory(dms, column_name);
if (column_directory == NULL)
{
obi_set_errno(OBICOLDIR_UNKNOWN_ERROR);
obidebug(1, "\nError opening a column directory structure");
return NULL;
}
// Get the latest version number
version_number = obi_get_new_version_number(column_directory, true);
if (version_number < 0)
{
return NULL;
}
// Build the indexer name if needed
if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || (data_type == OBI_QUAL))
{
if (strcmp(indexer_name, "") == 0)
{
final_indexer_name = obi_build_indexer_name(column_name, version_number);
if (final_indexer_name == NULL)
return NULL;
}
else
{
final_indexer_name = (char*) malloc((strlen(indexer_name)+1)*sizeof(char));
strcpy(final_indexer_name, indexer_name);
}
}
returned_data_type = data_type;
if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || (data_type == OBI_QUAL))
// stored data is indices referring to data stored elsewhere
stored_data_type = OBI_IDX;
else
stored_data_type = returned_data_type;
// The initial line count should be between the minimum (corresponding to the page size) and the maximum allowed
2016-02-18 10:38:51 +01:00
minimum_line_count = get_line_count_per_page(stored_data_type, nb_elements_per_line);
if (nb_lines > MAXIMUM_LINE_COUNT)
{
obidebug(1, "\nCan't create column because of line count greater than the maximum allowed (%d)", MAXIMUM_LINE_COUNT);
return NULL;
}
else if (nb_lines < minimum_line_count)
nb_lines = minimum_line_count;
// The number of elements names should be equal to the number of elements per line
2016-02-18 10:38:51 +01:00
if ((elements_names == NULL) && (nb_elements_per_line > 1))
{
obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1");
return NULL;
}
2016-02-18 10:38:51 +01:00
else if ((elements_names != NULL) && (nb_elements_per_line > 1))
{
char* token;
index_t n = 0;
token = strdup(elements_names);
token = strtok(token, ";");
while (token != NULL)
{
token = strtok(NULL, ";");
n++;
}
if (n != nb_elements_per_line)
{
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
return NULL;
}
}
2016-02-18 10:38:51 +01:00
else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0))
{
obidebug(1, "\nCan't create column because the element name does not match the column name");
return NULL;
}
// Calculate the size needed
header_size = obi_get_platform_header_size();
2016-02-18 10:38:51 +01:00
data_size = obi_array_sizeof(stored_data_type, nb_lines, nb_elements_per_line);
file_size = header_size + data_size;
2015-05-26 10:38:56 +02:00
// Get the column file name
column_file_name = build_column_file_name(column_name, version_number);
if (column_file_name == NULL)
{
return NULL;
}
2015-06-10 15:19:02 +02:00
// Open the column file
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (column_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening a column file %s", column_file_name);
free(column_file_name);
return NULL;
}
2015-05-26 10:38:56 +02:00
free(column_file_name);
// Truncate the column file to the right size
if (ftruncate(column_file_descriptor, file_size) < 0)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError truncating a column file to the right size");
close(column_file_descriptor);
2015-05-26 10:38:56 +02:00
return NULL;
}
// Allocate the memory for the column structure
new_column = (OBIDMS_column_p) malloc(sizeof(OBIDMS_column_t));
if (new_column == NULL)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError allocating the memory for the column structure");
close(column_file_descriptor);
2015-05-26 10:38:56 +02:00
return NULL;
}
// Fill the column structure
new_column->dms = dms;
new_column->column_directory = column_directory;
new_column->header = mmap(NULL,
header_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
0
);
2015-05-26 10:38:56 +02:00
if (new_column->header == MAP_FAILED)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the header of a column");
close(column_file_descriptor);
free(new_column);
2015-05-26 10:38:56 +02:00
return NULL;
}
new_column->data = mmap(NULL,
data_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
header_size
);
2015-05-26 10:38:56 +02:00
if (new_column->data == MAP_FAILED)
2015-05-26 10:38:56 +02:00
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the data of a column");
munmap(new_column->header, header_size);
close(column_file_descriptor);
free(new_column);
2015-05-26 10:38:56 +02:00
return NULL;
}
2016-02-18 10:38:51 +01:00
new_column->writable = true;
2015-05-26 10:38:56 +02:00
header = new_column->header;
header->header_size = header_size;
header->data_size = data_size;
header->line_count = nb_lines;
header->lines_used = 0;
2016-02-18 10:38:51 +01:00
header->nb_elements_per_line = nb_elements_per_line;
header->stored_data_type = stored_data_type;
header->returned_data_type = returned_data_type;
header->creation_date = time(NULL);
header->version = version_number;
header->cloned_from = -1;
obi_column_set_elements_names(new_column, elements_names);
strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME);
2015-05-26 10:38:56 +02:00
if (comments != NULL)
strncpy(header->comments, comments, COMMENTS_MAX_LENGTH);
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL))
{
new_column->indexer = obi_indexer(dms, final_indexer_name);
2016-04-12 14:53:33 +02:00
if (new_column->indexer == NULL)
{
2016-04-12 14:53:33 +02:00
obidebug(1, "\nError opening or creating the indexer associated with a column");
munmap(new_column->header, header_size);
close(column_file_descriptor);
free(new_column);
return NULL;
}
strncpy(header->indexer_name, final_indexer_name, INDEXER_MAX_NAME);
}
// Fill the data with NA values
obi_ini_to_NA_values(new_column, 0, nb_lines);
close(column_file_descriptor);
// Add in the list of opened columns
obi_dms_list_column(dms, new_column);
// Set counter to 1 // TODO Discuss counters
new_column->counter = 1;
return new_column;
2015-05-26 10:38:56 +02:00
}
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
const char* column_name,
obiversion_t version_number)
{
OBIDMS_column_p column;
OBIDMS_column_directory_p column_directory;
char* column_file_name;
int column_file_descriptor;
size_t header_size;
column = NULL;
// Get the column directory structure associated to the column
column_directory = obi_open_column_directory(dms, column_name);
if (column_directory == NULL)
{
obidebug(1, "\nError opening a column directory structure");
return NULL;
}
// Get the latest version number if it has the value -1 (not given by user)
if (version_number == -1)
{
version_number = obi_get_latest_version_number(column_directory);
if (version_number < 0)
{
obidebug(1, "\nError getting the latest version number in a column directory");
return NULL;
}
}
// Check if the column is already in the list of opened columns
column = obi_dms_get_column_from_list(dms, column_name, version_number);
// If it's found, increment its counter and return it
if (column != NULL)
{
(column->counter)++;
return column;
}
// Get the column file name
column_file_name = build_column_file_name(column_name, version_number);
if (column_file_name == NULL)
{
return NULL;
}
// Open the column file, ALWAYS READ-ONLY
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY);
if (column_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening column file");
free(column_file_name);
return NULL;
}
free(column_file_name);
// Allocate the memory for the column structure
column = (OBIDMS_column_p) malloc(sizeof(OBIDMS_column_t));
if (column == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError allocating the memory for a column structure");
close(column_file_descriptor);
return NULL;
}
// Read the header size
if (read(column_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t)))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError reading the header size to open a column");
close(column_file_descriptor);
free(column);
return NULL;
}
// Fill the column structure
column->dms = dms;
column->column_directory = column_directory;
column->header = mmap(NULL,
header_size,
PROT_READ,
MAP_SHARED,
column_file_descriptor,
0
2016-02-18 10:38:51 +01:00
);
if (column->header == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the header of a column");
close(column_file_descriptor);
free(column);
return NULL;
}
// Map the data
column->data = mmap(NULL,
(column->header)->data_size,
PROT_READ,
MAP_SHARED,
column_file_descriptor,
header_size
2016-02-18 10:38:51 +01:00
);
if (column->data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the data of a column");
munmap(column->header, header_size);
close(column_file_descriptor);
free(column);
return NULL;
}
2016-02-18 10:38:51 +01:00
column->writable = false;
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is opened
if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL))
{
2016-04-12 14:53:33 +02:00
column->indexer = obi_open_indexer(dms, (column->header)->indexer_name);
if (column->indexer == NULL)
{
2016-04-12 14:53:33 +02:00
obidebug(1, "\nError opening the indexer associated with a column");
munmap(column->header, header_size);
close(column_file_descriptor);
free(column);
return NULL;
}
}
close(column_file_descriptor);
// Add in the list of opened columns
obi_dms_list_column(dms, column);
// Set counter to 1
column->counter = 1;
return column;
}
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
2016-02-18 10:38:51 +01:00
OBIDMS_column_p line_selection,
const char* column_name,
obiversion_t version_number,
bool clone_data)
{
OBIDMS_column_p column_to_clone;
OBIDMS_column_p new_column;
index_t nb_lines;
index_t nb_elements_per_line;
OBIType_t data_type;
2016-02-18 10:38:51 +01:00
size_t line_size;
index_t i, index;
column_to_clone = obi_open_column(dms, column_name, version_number);
if (column_to_clone == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError opening the column to clone");
return NULL;
}
data_type = (column_to_clone->header)->returned_data_type;
2016-02-18 10:38:51 +01:00
nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line;
2016-02-18 10:38:51 +01:00
if (clone_data)
nb_lines = (column_to_clone->header)->line_count;
else
nb_lines = get_line_count_per_page(data_type, nb_elements_per_line); // minimum line count corresponding to one memory page
new_column = obi_create_column(dms,
column_name,
data_type,
nb_lines,
nb_elements_per_line,
(column_to_clone->header)->elements_names,
2016-04-12 14:53:33 +02:00
(column_to_clone->header)->indexer_name,
2016-02-18 10:38:51 +01:00
(column_to_clone->header)->comments
);
if (new_column == NULL)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError creating the new column when cloning a column");
// The new file is deleted
const char* column_file_name = build_column_file_name(column_name, version_number);
if (remove(column_file_name) < 0)
obidebug(1, "\nError deleting a bad cloned file");
}
(new_column->header)->cloned_from = (column_to_clone->header)->version;
2016-02-18 10:38:51 +01:00
if (clone_data && (line_selection == NULL))
{
memcpy(new_column->data, column_to_clone->data, (column_to_clone->header)->data_size);
(new_column->header)->lines_used = (column_to_clone->header)->lines_used;
}
2016-02-18 10:38:51 +01:00
else if (clone_data && (line_selection != NULL))
{
line_size = obi_sizeof((new_column->header)->stored_data_type) * (new_column->header)->nb_elements_per_line;
for (i=0; i<((line_selection->header)->lines_used); i++)
{
index = *(((index_t*) (line_selection->data)) + i);
memcpy((new_column->data)+(i*line_size), (column_to_clone->data)+(index*line_size), line_size);
}
(new_column->header)->lines_used = (line_selection->header)->lines_used;
}
2016-02-18 10:38:51 +01:00
// Close column_to_clone
if (obi_close_column(column_to_clone) < 0)
{
obidebug(1, "\nError closing a column that has been cloned");
// TODO return NULL or not?
}
return new_column;
}
int obi_close_column(OBIDMS_column_p column)
{
bool close_dir;
int ret_val = 0;
// Truncate the column to the number of lines used if it's not read-only
if (column->writable)
ret_val = obi_truncate_column(column);
(column->counter)--;
if (column->counter == 0)
{
// Delete from the list of opened columns
if (obi_dms_unlist_column(column->dms, column) < 0)
ret_val = -1;
// Close column directory if it was the last column opened from that directory
close_dir = obi_dms_is_column_name_in_list(column->dms, (column->header)->name);
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed
if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL))
2016-04-12 14:53:33 +02:00
if (obi_close_indexer(column->indexer) < 0)
ret_val = -1;
// Munmap data
if (munmap(column->data, (column->header)->data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping column data");
ret_val = -1;
}
// Munmap header
if (munmap(column->header, (column->header)->header_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping a column header");
ret_val = -1;
}
// Close column directory
if (close_dir)
if (obi_close_column_directory(column->column_directory) < 0)
ret_val = -1;
free(column);
}
return ret_val;
}
int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap/remap?
{
size_t file_size;
size_t data_size;
index_t new_line_count;
double multiple;
int column_file_descriptor;
char* column_file_name;
// Compute the new line count = the number of lines used rounded to the nearest greater multiple of page size greater than 0
2016-02-18 10:38:51 +01:00
multiple = ceil((double) (ONE_IF_ZERO((column->header)->lines_used) * (column->header)->nb_elements_per_line * obi_sizeof((column->header)->stored_data_type)) / (double) getpagesize());
new_line_count = floor((((int) multiple) * getpagesize()) / ((column->header)->nb_elements_per_line * obi_sizeof((column->header)->stored_data_type)));
// Check that it is actually greater than the current number of lines allocated in the file, otherwise no need to truncate
if ((column->header)->line_count == new_line_count)
return 0;
// Get the column file name
column_file_name = build_column_file_name((column->header)->name, (column->header)->version);
if (column_file_name == NULL)
{
return -1;
}
// Open the column file
column_file_descriptor = openat((column->column_directory)->dir_fd, column_file_name, O_RDWR);
if (column_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
2015-08-26 17:05:37 +02:00
obidebug(1, "\nError getting the file descriptor of a column file");
free(column_file_name);
return -1;
}
free(column_file_name);
// Unmap the data before truncating the file
if (munmap(column->data, (column->header)->data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping the data of a column before truncating");
close(column_file_descriptor);
return -1;
}
// Truncate the column file
2016-02-18 10:38:51 +01:00
data_size = obi_array_sizeof((column->header)->stored_data_type, new_line_count, (column->header)->nb_elements_per_line);
file_size = (column->header)->header_size + data_size;
if (ftruncate(column_file_descriptor, file_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError truncating a column file at the number of lines used");
close(column_file_descriptor);
return -1;
}
// Remap the data
column->data = mmap(NULL,
data_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
(column->header)->header_size
);
if (column->data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError re-mmapping the data of a column after truncating");
close(column_file_descriptor);
return -1;
}
// Set new line_count and new data size
(column->header)->line_count = new_line_count;
(column->header)->data_size = data_size;
close(column_file_descriptor);
return 0;
}
int obi_enlarge_column(OBIDMS_column_p column)
{
size_t file_size;
size_t old_data_size;
size_t new_data_size;
size_t header_size;
index_t old_line_count;
index_t new_line_count;
int column_file_descriptor;
char* column_file_name;
// Check if the column is read-only
if (!(column->writable))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to enlarge a read-only column");
return -1;
}
// Get the column file name
column_file_name = build_column_file_name((column->header)->name, (column->header)->version);
if (column_file_name == NULL)
{
return -1;
}
// Open the column file
column_file_descriptor = openat((column->column_directory)->dir_fd, column_file_name, O_RDWR);
if (column_file_descriptor < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError getting the file descriptor of a column file");
free(column_file_name);
return -1;
}
free(column_file_name);
// Calculate the new file size
old_line_count = (column->header)->line_count;
new_line_count = old_line_count * COLUMN_GROWTH_FACTOR;
if (new_line_count > MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError enlarging a column file: new line count greater than the maximum allowed");
close(column_file_descriptor);
return -1;
}
old_data_size = (column->header)->data_size;
new_data_size = old_data_size * COLUMN_GROWTH_FACTOR;
header_size = (column->header)->header_size;
file_size = header_size + new_data_size;
// Enlarge the file // TODO isn't it possible that this makes the file "move"?
if (ftruncate(column_file_descriptor, file_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError enlarging a column file");
close(column_file_descriptor);
return -1;
}
// Unmap and remap the data
if (munmap(column->data, old_data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping the data of a column before enlarging");
close(column_file_descriptor);
return -1;
}
column->data = mmap(NULL,
new_data_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
header_size
);
if (column->data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError re-mmapping the data of a column after enlarging the file");
close(column_file_descriptor);
return -1;
}
// Set new line count and new data size
(column->header)->line_count = new_line_count;
(column->header)->data_size = new_data_size;
// Initialize new data lines to NA
obi_ini_to_NA_values(column, old_line_count, new_line_count - old_line_count);
close(column_file_descriptor);
return 0;
}
void obi_ini_to_NA_values(OBIDMS_column_p column,
index_t first_line_nb,
index_t nb_lines)
{
index_t i, start, end, nb_elements;
2016-02-18 10:38:51 +01:00
nb_elements = nb_lines*((column->header)->nb_elements_per_line);
start = first_line_nb*((column->header)->nb_elements_per_line);
end = start + nb_elements;
switch ((column->header)->stored_data_type) {
case OBI_VOID: // TODO;
break;
case OBI_INT: for (i=start;i<end;i++)
{
*(((obiint_t*) (column->data)) + i) = OBIInt_NA;
}
break;
case OBI_FLOAT: for (i=start;i<end;i++)
{
*(((obifloat_t*) (column->data)) + i) = OBIFloat_NA;
}
break;
case OBI_BOOL: for (i=start;i<end;i++)
{
*(((obibool_t*) (column->data)) + i) = OBIBool_NA;
}
break;
case OBI_CHAR: for (i=start;i<end;i++)
{
*(((obichar_t*) (column->data)) + i) = OBIChar_NA;
}
break;
case OBI_IDX: for (i=start;i<end;i++)
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
break;
case OBI_QUAL: for (i=start;i<end;i++) // case not used since OBI_QUAL is only a returned_data_type
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
break;
case OBI_STR: for (i=start;i<end;i++) // case not used since OBI_QUAL is only a returned_data_type
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
break;
case OBI_SEQ: for (i=start;i<end;i++) // case not used since OBI_QUAL is only a returned_data_type
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
break;
}
}
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number)
{
OBIDMS_column_header_p header;
OBIDMS_column_directory_p column_directory;
char* column_file_name;
int column_file_descriptor;
size_t header_size;
// Get the column directory structure associated to the column
column_directory = obi_open_column_directory(dms, column_name);
if (column_directory == NULL)
{
obidebug(1, "\nError opening a column directory structure");
return NULL;
}
// Get the latest version number if not provided
if (version_number < 0)
{
version_number = obi_get_latest_version_number(column_directory);
if (version_number < 0)
{
obidebug(1, "\nError getting the latest version number in a column directory");
return NULL;
}
}
// Get the column file name
column_file_name = build_column_file_name(column_name, version_number);
if (column_file_name == NULL)
{
return NULL;
}
// Open the column file (READ-ONLY)
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY);
if (column_file_descriptor < 0)
{
obidebug(1, "\nError opening a column file");
obi_set_errno(OBICOL_UNKNOWN_ERROR);
free(column_file_name);
return NULL;
}
free(column_file_name);
// Read the header size
if (read(column_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t)))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError reading the header size to read a header");
close(column_file_descriptor);
return NULL;
}
// Fill the header structure
header = mmap(NULL,
header_size,
PROT_READ,
MAP_SHARED,
column_file_descriptor,
0
);
if (header == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the header of a column");
close(column_file_descriptor);
return NULL;
}
close(column_file_descriptor);
return header;
}
int obi_close_header(OBIDMS_column_header_p header)
{
if (munmap(header, header->header_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping a column header");
return -1;
}
return 0;
}
// TODO to be rewritten in an optimized and safe way if possible
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
{
char* elements_names;
char* name;
index_t element_index;
elements_names = strdup((column->header)->elements_names);
if (elements_names == NULL)
{
obidebug(1, "\nError strdup-ing the elements names");
return OBIIdx_NA;
}
element_index = 0;
name = strtok(elements_names, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
return element_index;
}
element_index++;
while (name != NULL)
{
name = strtok(NULL, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
return element_index;
}
element_index++;
}
obidebug(1, "\nCan't find an element name");
free(elements_names);
return OBIIdx_NA;
}
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
{
// Check if the column is read-only
if (!(column->writable))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value in a read-only column");
return -1;
}
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
return 0;
}
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb) // TODO problem with some columns in a view being empty or shorter and triggering an error because they've been truncated when the view was closed. Fixed with obiview.c in update_lines() for now
{
if ((line_nb+1) > ((column->header)->line_count))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines of the column");
return -1;
}
return 0;
}