/**************************************************************************** * OBIDMS columns functions * ****************************************************************************/ /** * @file obidmscolumn.c * @author Celine Mercier (celine.mercier@metabarcoding.org) * @date 22 May 2015 * @brief Functions shared by all the OBIDMS columns. */ #include #include #include #include #include #include #include #include #include #include #include #include "obidmscolumn.h" #include "obidmscolumndir.h" #include "obidms.h" #include "obitypes.h" #include "obierrno.h" #include "obidebug.h" #include "obilittlebigman.h" #include "obiblob_indexer.h" #include "utils.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) /************************************************************************** * * D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S * **************************************************************************/ /** * @brief Internal function building the file name for a column. * * The function builds the file name corresponding to a column of an OBIDMS. * * @warning The returned pointer has to be freed by the caller. * * @param column_name The name of the OBIDMS column file. * @param version_number The version number of the OBIDMS column file. * * @returns A pointer to the column file name. * @retval NULL if an error occurred. * * @since May 2015 * @author Eric Coissac (eric.coissac@metabarcoding.org) */ static char* build_column_file_name(const char* column_name, obiversion_t version_number); /** * @brief Internal function building the file name for a column version file. * * The column version file indicates the latest version number for a column. * This function returns the name of the file storing this information. * * @warning The returned pointer has to be freed by the caller. * * @param column_name The name of the OBIDMS column. * * @returns A pointer to the version file name. * @retval NULL if an error occurred. * * @since May 2015 * @author Eric Coissac (eric.coissac@metabarcoding.org) */ static char* build_version_file_name(const char* column_name); /** * @brief Internal function returning a new column version number * in the OBIDMS database. * * @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory(). * @param block Whether the call is blocking or not: * - `true` the call is blocking * - `false` the call is not blocking. * * @returns The next version number for this column. * @retval -1 if an error occurred. * * @since May 2015 * @author Eric Coissac (eric.coissac@metabarcoding.org) */ static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block); /** * @brief Internal function creating a new column version file * in the OBIDMS database. * * The new file is initialized with the minimum version number `0`. * * @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory(). * * @returns The next usable version number for this column : `0`. * @retval -1 if an error occurred. * * @since May 2015 * @author Eric Coissac (eric.coissac@metabarcoding.org) */ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory); /** * @brief Internal function setting the elements names of the lines of a * column in the header of the OBIDMS column structure. * * @param column A pointer as returned by obi_create_column(). * @param elements_names The names of the elements with ';' as separator. * * @retval 0 if the operation was successfully completed. * @retval -1 if an error occurred. * * @since July 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names); /** * @brief Internal function computing how many lines of an OBIDMS column * fit in a memory page. * * @param data_type The data OBIType. * @param nb_elements_per_line The number of elements per line. * * @returns The line count for one memory page. * * @since September 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line); /************************************************************************ * * D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S * ************************************************************************/ static char* build_column_file_name(const char* column_name, obiversion_t version_number) { char* file_name; int version_number_length; // Build the file name version_number_length = (version_number == 0 ? 1 : (int)(log10(version_number)+1)); file_name = (char*) malloc((strlen(column_name) + version_number_length + 6)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a column file name"); return NULL; } if (sprintf(file_name,"%s@%d.odc", column_name, version_number) < 0) { obi_set_errno(OBICOL_MEMORY_ERROR); obidebug(1, "\nError building a column file name"); return NULL; } return file_name; } static char* build_version_file_name(const char* column_name) { char* file_name; // Build the file name file_name = (char*) malloc((strlen(column_name) + 5)*sizeof(char)); if (file_name == NULL) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for a version file name"); return NULL; } if (sprintf(file_name,"%s.odv", column_name) < 0) { obi_set_errno(OBICOL_MEMORY_ERROR); obidebug(1, "\nError building a version file name"); return NULL; } return file_name; } static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block) { off_t loc_size; obiversion_t new_version_number; char* version_file_name; int version_file_descriptor; int lock_mode; new_version_number = 0; loc_size = sizeof(obiversion_t); // Select the correct lockf operation according to the blocking mode if (block) lock_mode=F_LOCK; else lock_mode=F_TLOCK; // Build the version file name version_file_name = build_version_file_name(column_directory->column_name); if (version_file_name == NULL) return -1; // Open the version file version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDWR); if (version_file_descriptor < 0) { if (errno == ENOENT) return create_version_file(column_directory); else { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError opening a version file"); free(version_file_name); return -1; } } free(version_file_name); // Test if the version file size is ok if (lseek(version_file_descriptor, 0, SEEK_END) < loc_size) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError testing if a version file size is ok"); close(version_file_descriptor); return -1; } // Reset offset to 0 if (lseek(version_file_descriptor, 0, SEEK_SET) != 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError positioning offset in version file"); close(version_file_descriptor); return -1; } // Lock the file if (lockf(version_file_descriptor, lock_mode, loc_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError locking a version file"); close(version_file_descriptor); return -1; } // Read the current version number if (read(version_file_descriptor, &new_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t))) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError reading a version file"); close(version_file_descriptor); return -1; } new_version_number++; // Reset offset to 0 to write the new version number if (lseek(version_file_descriptor, 0, SEEK_SET) != 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError positioning offset in version file"); close(version_file_descriptor); return -1; } // Write the new version number if (write(version_file_descriptor, &new_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t))) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError writing a new version number in a version file"); close(version_file_descriptor); return -1; } // Reset offset to 0 (TODO: why?) if (lseek(version_file_descriptor, 0, SEEK_SET) != 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError positioning offset in version file"); close(version_file_descriptor); return -1; } // Unlock the file if (lockf(version_file_descriptor, F_ULOCK, loc_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError unlocking a version file"); close(version_file_descriptor); return -1; } if (close(version_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError closing a version file"); return -1; } return new_version_number; } static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory) { off_t loc_size; obiversion_t version_number; char* version_file_name; int version_file_descriptor; loc_size = sizeof(obiversion_t); version_number = 0; version_file_name = build_version_file_name(column_directory->column_name); if (version_file_name == NULL) return -1; // Get the file descriptor associated to the version file version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (version_file_descriptor < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError opening a version file"); free(version_file_name); return -1; } free(version_file_name); // Lock the file if (lockf(version_file_descriptor, F_LOCK, loc_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError locking a version file"); close(version_file_descriptor); return -1; } // Truncate the version file to the right size if (ftruncate(version_file_descriptor, loc_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError truncating a version file"); close(version_file_descriptor); return -1; } // Position offset to 0 to prepare for writing // TODO Unnecessary? if (lseek(version_file_descriptor, 0, SEEK_SET) != 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError changing offset of a version file"); close(version_file_descriptor); return -1; } // Write version number if (write(version_file_descriptor, &version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t))) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError writing version number in a version file"); close(version_file_descriptor); return -1; } // Prepare for unlocking if (lseek(version_file_descriptor, 0, SEEK_SET) != 0) // TODO Unnecessary? { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError preparing a version file for unlocking"); close(version_file_descriptor); return -1; } // Unlock the file if (lockf(version_file_descriptor, F_ULOCK, loc_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError unlocking a version file"); close(version_file_descriptor); return -1; } if (close(version_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError closing a version file"); return -1; } return version_number; } int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names) { strcpy((column->header)->elements_names, elements_names); return 0; } index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line) { return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line); } /********************************************************************** * * D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S * **********************************************************************/ obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory) { off_t loc_size; obiversion_t latest_version_number; char * version_file_name; int version_file_descriptor; loc_size = sizeof(obiversion_t); latest_version_number = 0; version_file_name = build_version_file_name(column_directory->column_name); if (version_file_name==NULL) return -1; // Get the file descriptor associated to the version file version_file_descriptor = openat(column_directory->dir_fd, version_file_name, O_RDONLY); if (version_file_descriptor < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError opening a version file"); free(version_file_name); return -1; } free(version_file_name); // Check that the version file size is ok if (lseek(version_file_descriptor, 0, SEEK_END) < loc_size) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError testing if a version file size is ok"); close(version_file_descriptor); return -1; } // Set the offset to 0 in the version file if (lseek(version_file_descriptor, 0, SEEK_SET) != 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError setting the offset of a version file to 0"); close(version_file_descriptor); return -1; } // Read the latest version number if (read(version_file_descriptor, &latest_version_number, sizeof(obiversion_t)) < ((ssize_t) sizeof(obiversion_t))) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError reading the latest version number in a version file"); close(version_file_descriptor); return -1; } if (close(version_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError closing a version file"); return -1; } return latest_version_number; } obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name) { OBIDMS_column_directory_p column_directory; obiversion_t latest_version; // Get the column directory structure associated to the column column_directory = obi_open_column_directory(dms, column_name); if (column_directory == NULL) { obidebug(1, "\nProblem opening a column directory structure"); return -1; } // Get the latest version number latest_version = obi_get_latest_version_number(column_directory); if (latest_version < 0) { obidebug(1, "\nProblem getting the latest version number in a column directory"); return -1; } return latest_version; } size_t obi_get_platform_header_size() { size_t header_size; size_t rounded_header_size; double multiple; header_size = sizeof(OBIDMS_column_header_t); multiple = ceil((double) header_size / (double) getpagesize()); rounded_header_size = multiple * getpagesize(); return rounded_header_size; } OBIDMS_column_p obi_create_column(OBIDMS_p dms, const char* column_name, OBIType_t data_type, index_t nb_lines, index_t nb_elements_per_line, const char* elements_names, const char* indexer_name, const char* associated_column_name, obiversion_t associated_column_version, const char* comments ) { OBIDMS_column_p new_column; OBIDMS_column_directory_p column_directory; OBIDMS_column_header_p header; size_t file_size; obiversion_t version_number; char* column_file_name; int column_file_descriptor; size_t header_size; size_t data_size; index_t minimum_line_count; OBIType_t returned_data_type; OBIType_t stored_data_type; char* final_indexer_name; new_column = NULL; // Check that the informations given are not NULL/invalid/greater than the allowed sizes if (dms == NULL) { obidebug(1, "\nCan't create column because of invalid DMS"); return NULL; } if (column_name == NULL) { obidebug(1, "\nCan't create column because of empty column name"); return NULL; } if ((data_type < 1) || (data_type > 8)) // TODO check in more robust way { obidebug(1, "\nCan't create column because of invalid data type"); return NULL; } // Get the column directory structure associated to the column column_directory = obi_column_directory(dms, column_name); if (column_directory == NULL) { obi_set_errno(OBICOLDIR_UNKNOWN_ERROR); obidebug(1, "\nError opening a column directory structure"); return NULL; } // Get the latest version number version_number = obi_get_new_version_number(column_directory, true); if (version_number < 0) { return NULL; } // Build the indexer name if needed if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || (data_type == OBI_QUAL)) { if (strcmp(indexer_name, "") == 0) { final_indexer_name = obi_build_indexer_name(column_name, version_number); if (final_indexer_name == NULL) return NULL; } else { final_indexer_name = (char*) malloc((strlen(indexer_name)+1)*sizeof(char)); strcpy(final_indexer_name, indexer_name); } } returned_data_type = data_type; if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || (data_type == OBI_QUAL)) // stored data is indices referring to data stored elsewhere stored_data_type = OBI_IDX; else stored_data_type = returned_data_type; // The initial line count should be between the minimum (corresponding to the page size) and the maximum allowed minimum_line_count = get_line_count_per_page(stored_data_type, nb_elements_per_line); if (nb_lines > MAXIMUM_LINE_COUNT) { obidebug(1, "\nCan't create column because of line count greater than the maximum allowed (%d)", MAXIMUM_LINE_COUNT); return NULL; } else if (nb_lines < minimum_line_count) nb_lines = minimum_line_count; // The number of elements names should be equal to the number of elements per line if ((elements_names == NULL) && (nb_elements_per_line > 1)) { obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1"); return NULL; } else if ((elements_names != NULL) && (nb_elements_per_line > 1)) { char* token; index_t n = 0; token = strdup(elements_names); token = strtok(token, ";"); while (token != NULL) { token = strtok(NULL, ";"); n++; } if (n != nb_elements_per_line) { obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line"); return NULL; } } else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0)) { obidebug(1, "\nCan't create column because the element name does not match the column name"); return NULL; } // Calculate the size needed header_size = obi_get_platform_header_size(); data_size = obi_array_sizeof(stored_data_type, nb_lines, nb_elements_per_line); file_size = header_size + data_size; // Get the column file name column_file_name = build_column_file_name(column_name, version_number); if (column_file_name == NULL) { return NULL; } // Open the column file column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (column_file_descriptor < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError opening a column file %s", column_file_name); free(column_file_name); return NULL; } free(column_file_name); // Truncate the column file to the right size if (ftruncate(column_file_descriptor, file_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError truncating a column file to the right size"); close(column_file_descriptor); return NULL; } // Allocate the memory for the column structure new_column = (OBIDMS_column_p) malloc(sizeof(OBIDMS_column_t)); if (new_column == NULL) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError allocating the memory for the column structure"); close(column_file_descriptor); return NULL; } // Fill the column structure new_column->dms = dms; new_column->column_directory = column_directory; new_column->header = mmap(NULL, header_size, PROT_READ | PROT_WRITE, MAP_SHARED, column_file_descriptor, 0 ); if (new_column->header == MAP_FAILED) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError mmapping the header of a column"); close(column_file_descriptor); free(new_column); return NULL; } new_column->data = mmap(NULL, data_size, PROT_READ | PROT_WRITE, MAP_SHARED, column_file_descriptor, header_size ); if (new_column->data == MAP_FAILED) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError mmapping the data of a column"); munmap(new_column->header, header_size); close(column_file_descriptor); free(new_column); return NULL; } new_column->writable = true; header = new_column->header; header->header_size = header_size; header->data_size = data_size; header->line_count = nb_lines; header->lines_used = 0; header->nb_elements_per_line = nb_elements_per_line; header->stored_data_type = stored_data_type; header->returned_data_type = returned_data_type; header->creation_date = time(NULL); header->version = version_number; header->cloned_from = -1; obi_column_set_elements_names(new_column, elements_names); strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME); if (comments != NULL) strncpy(header->comments, comments, COMMENTS_MAX_LENGTH); // Store the associated column reference if needed // TODO discuss cases if (data_type == OBI_QUAL) { if (associated_column_name == NULL) { obidebug(1, "\nError: The name of the associated column when creating a new column is NULL"); munmap(new_column->header, header_size); close(column_file_descriptor); free(new_column); return NULL; } strcpy((header->associated_column).column_name, associated_column_name); if (associated_column_version == -1) { obidebug(1, "\nError: The version of the associated column when creating a new column is not defined"); munmap(new_column->header, header_size); close(column_file_descriptor); free(new_column); return NULL; } (header->associated_column).version = associated_column_version; } // If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL)) { new_column->indexer = obi_indexer(dms, final_indexer_name); if (new_column->indexer == NULL) { obidebug(1, "\nError opening or creating the indexer associated with a column"); munmap(new_column->header, header_size); close(column_file_descriptor); free(new_column); return NULL; } strncpy(header->indexer_name, final_indexer_name, INDEXER_MAX_NAME); } // Fill the data with NA values obi_ini_to_NA_values(new_column, 0, nb_lines); if (close(column_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError closing a column file"); return NULL; } // Add in the list of opened columns obi_dms_list_column(dms, new_column); // Set counter to 1 // TODO Discuss counters new_column->counter = 1; return new_column; } OBIDMS_column_p obi_open_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number) { OBIDMS_column_p column; OBIDMS_column_directory_p column_directory; char* column_file_name; int column_file_descriptor; size_t header_size; column = NULL; // Get the column directory structure associated to the column column_directory = obi_open_column_directory(dms, column_name); if (column_directory == NULL) { obidebug(1, "\nError opening a column directory structure"); return NULL; } // Get the latest version number if it has the value -1 (not given by user) if (version_number == -1) { version_number = obi_get_latest_version_number(column_directory); if (version_number < 0) { obidebug(1, "\nError getting the latest version number in a column directory"); return NULL; } } // Check if the column is already in the list of opened columns column = obi_dms_get_column_from_list(dms, column_name, version_number); // If it's found, increment its counter and return it if (column != NULL) { (column->counter)++; return column; } // Get the column file name column_file_name = build_column_file_name(column_name, version_number); if (column_file_name == NULL) { return NULL; } // Open the column file, ALWAYS READ-ONLY column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY); if (column_file_descriptor < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError opening column file"); free(column_file_name); return NULL; } free(column_file_name); // Allocate the memory for the column structure column = (OBIDMS_column_p) malloc(sizeof(OBIDMS_column_t)); if (column == NULL) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError allocating the memory for a column structure"); close(column_file_descriptor); return NULL; } // Read the header size if (read(column_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t))) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError reading the header size to open a column"); close(column_file_descriptor); free(column); return NULL; } // Fill the column structure column->dms = dms; column->column_directory = column_directory; column->header = mmap(NULL, header_size, PROT_READ, MAP_SHARED, column_file_descriptor, 0 ); if (column->header == MAP_FAILED) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError mmapping the header of a column"); close(column_file_descriptor); free(column); return NULL; } // Map the data column->data = mmap(NULL, (column->header)->data_size, PROT_READ, MAP_SHARED, column_file_descriptor, header_size ); if (column->data == MAP_FAILED) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError mmapping the data of a column"); munmap(column->header, header_size); close(column_file_descriptor); free(column); return NULL; } column->writable = false; // If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is opened if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL)) { column->indexer = obi_open_indexer(dms, (column->header)->indexer_name); if (column->indexer == NULL) { obidebug(1, "\nError opening the indexer associated with a column"); munmap(column->header, header_size); close(column_file_descriptor); free(column); return NULL; } } if (close(column_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError closing a column file"); return NULL; } // Add in the list of opened columns obi_dms_list_column(dms, column); // Set counter to 1 column->counter = 1; return column; } OBIDMS_column_p obi_clone_column(OBIDMS_p dms, OBIDMS_column_p line_selection, const char* column_name, obiversion_t version_number, bool clone_data) { OBIDMS_column_p column_to_clone; OBIDMS_column_p new_column; index_t nb_lines; index_t nb_elements_per_line; OBIType_t data_type; size_t line_size; index_t i, index; column_to_clone = obi_open_column(dms, column_name, version_number); if (column_to_clone == NULL) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError opening the column to clone"); return NULL; } data_type = (column_to_clone->header)->returned_data_type; nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line; if (clone_data) nb_lines = (column_to_clone->header)->line_count; else nb_lines = get_line_count_per_page(data_type, nb_elements_per_line); // minimum line count corresponding to one memory page new_column = obi_create_column(dms, column_name, data_type, nb_lines, nb_elements_per_line, (column_to_clone->header)->elements_names, (column_to_clone->header)->indexer_name, ((column_to_clone->header)->associated_column).column_name, ((column_to_clone->header)->associated_column).version, (column_to_clone->header)->comments ); if (new_column == NULL) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError creating the new column when cloning a column"); // The new file is deleted TODO check if it exists before //const char* column_file_name = build_column_file_name(column_name, version_number); //if (remove(column_file_name) < 0) // obidebug(1, "\nError deleting a bad cloned file"); return NULL; } (new_column->header)->cloned_from = (column_to_clone->header)->version; if (clone_data && (line_selection == NULL)) { memcpy(new_column->data, column_to_clone->data, (column_to_clone->header)->data_size); (new_column->header)->lines_used = (column_to_clone->header)->lines_used; } else if (clone_data && (line_selection != NULL)) { line_size = obi_sizeof((new_column->header)->stored_data_type) * (new_column->header)->nb_elements_per_line; for (i=0; i<((line_selection->header)->lines_used); i++) { index = *(((index_t*) (line_selection->data)) + i); memcpy((new_column->data)+(i*line_size), (column_to_clone->data)+(index*line_size), line_size); } (new_column->header)->lines_used = (line_selection->header)->lines_used; } // Close column_to_clone if (obi_close_column(column_to_clone) < 0) { obidebug(1, "\nError closing a column that has been cloned"); // TODO return NULL or not? } return new_column; } int obi_close_column(OBIDMS_column_p column) { int ret_val = 0; // Truncate the column to the number of lines used if it's not read-only if (column->writable) ret_val = obi_truncate_column(column); (column->counter)--; if (column->counter == 0) { // Delete from the list of opened columns if (obi_dms_unlist_column(column->dms, column) < 0) ret_val = -1; // If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL)) if (obi_close_indexer(column->indexer) < 0) ret_val = -1; // Munmap data if (munmap(column->data, (column->header)->data_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError munmapping column data"); ret_val = -1; } // Munmap header if (munmap(column->header, (column->header)->header_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError munmapping a column header"); ret_val = -1; } // Close column directory if (obi_close_column_directory(column->column_directory) < 0) ret_val = -1; free(column); } return ret_val; } int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap/remap? { size_t file_size; size_t data_size; index_t new_line_count; double multiple; int column_file_descriptor; char* column_file_name; // Compute the new line count = the number of lines used rounded to the nearest greater multiple of page size greater than 0 multiple = ceil((double) (ONE_IF_ZERO((column->header)->lines_used) * (column->header)->nb_elements_per_line * obi_sizeof((column->header)->stored_data_type)) / (double) getpagesize()); new_line_count = floor((((int) multiple) * getpagesize()) / ((column->header)->nb_elements_per_line * obi_sizeof((column->header)->stored_data_type))); // Check that it is actually greater than the current number of lines allocated in the file, otherwise no need to truncate if ((column->header)->line_count == new_line_count) return 0; // Get the column file name column_file_name = build_column_file_name((column->header)->name, (column->header)->version); if (column_file_name == NULL) { return -1; } // Open the column file column_file_descriptor = openat((column->column_directory)->dir_fd, column_file_name, O_RDWR); if (column_file_descriptor < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError getting the file descriptor of a column file"); free(column_file_name); return -1; } free(column_file_name); // Unmap the data before truncating the file if (munmap(column->data, (column->header)->data_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError munmapping the data of a column before truncating"); close(column_file_descriptor); return -1; } // Truncate the column file data_size = obi_array_sizeof((column->header)->stored_data_type, new_line_count, (column->header)->nb_elements_per_line); file_size = (column->header)->header_size + data_size; if (ftruncate(column_file_descriptor, file_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError truncating a column file at the number of lines used"); close(column_file_descriptor); return -1; } // Remap the data column->data = mmap(NULL, data_size, PROT_READ | PROT_WRITE, MAP_SHARED, column_file_descriptor, (column->header)->header_size ); if (column->data == MAP_FAILED) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError re-mmapping the data of a column after truncating"); close(column_file_descriptor); return -1; } // Set new line_count and new data size (column->header)->line_count = new_line_count; (column->header)->data_size = data_size; if (close(column_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError closing a column file"); return -1; } return 0; } int obi_enlarge_column(OBIDMS_column_p column) { size_t file_size; size_t old_data_size; size_t new_data_size; size_t header_size; index_t old_line_count; index_t new_line_count; int column_file_descriptor; char* column_file_name; // Check if the column is read-only if (!(column->writable)) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError trying to enlarge a read-only column"); return -1; } // Get the column file name column_file_name = build_column_file_name((column->header)->name, (column->header)->version); if (column_file_name == NULL) { return -1; } // Open the column file column_file_descriptor = openat((column->column_directory)->dir_fd, column_file_name, O_RDWR); if (column_file_descriptor < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError getting the file descriptor of a column file"); free(column_file_name); return -1; } free(column_file_name); // Calculate the new file size old_line_count = (column->header)->line_count; new_line_count = old_line_count * COLUMN_GROWTH_FACTOR; if (new_line_count > MAXIMUM_LINE_COUNT) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError enlarging a column file: new line count greater than the maximum allowed"); close(column_file_descriptor); return -1; } old_data_size = (column->header)->data_size; new_data_size = old_data_size * COLUMN_GROWTH_FACTOR; header_size = (column->header)->header_size; file_size = header_size + new_data_size; // Enlarge the file // TODO isn't it possible that this makes the file "move"? if (ftruncate(column_file_descriptor, file_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError enlarging a column file"); close(column_file_descriptor); return -1; } // Unmap and remap the data if (munmap(column->data, old_data_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError munmapping the data of a column before enlarging"); close(column_file_descriptor); return -1; } column->data = mmap(NULL, new_data_size, PROT_READ | PROT_WRITE, MAP_SHARED, column_file_descriptor, header_size ); if (column->data == MAP_FAILED) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError re-mmapping the data of a column after enlarging the file"); close(column_file_descriptor); return -1; } // Set new line count and new data size (column->header)->line_count = new_line_count; (column->header)->data_size = new_data_size; // Initialize new data lines to NA obi_ini_to_NA_values(column, old_line_count, new_line_count - old_line_count); if (close(column_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError closing a column file"); return -1; } return 0; } void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t nb_lines) { index_t i, start, end, nb_elements; nb_elements = nb_lines*((column->header)->nb_elements_per_line); start = first_line_nb*((column->header)->nb_elements_per_line); end = start + nb_elements; switch ((column->header)->stored_data_type) { case OBI_VOID: // TODO; break; case OBI_INT: for (i=start;idata)) + i) = OBIInt_NA; } break; case OBI_FLOAT: for (i=start;idata)) + i) = OBIFloat_NA; } break; case OBI_BOOL: for (i=start;idata)) + i) = OBIBool_NA; } break; case OBI_CHAR: for (i=start;idata)) + i) = OBIChar_NA; } break; case OBI_IDX: for (i=start;idata)) + i) = OBIIdx_NA; } break; case OBI_QUAL: for (i=start;idata)) + i) = OBIIdx_NA; } break; case OBI_STR: for (i=start;idata)) + i) = OBIIdx_NA; } break; case OBI_SEQ: for (i=start;idata)) + i) = OBIIdx_NA; } break; } } OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number) { OBIDMS_column_header_p header; OBIDMS_column_directory_p column_directory; char* column_file_name; int column_file_descriptor; size_t header_size; // Get the column directory structure associated to the column column_directory = obi_open_column_directory(dms, column_name); if (column_directory == NULL) { obidebug(1, "\nError opening a column directory structure"); return NULL; } // Get the latest version number if not provided if (version_number < 0) { version_number = obi_get_latest_version_number(column_directory); if (version_number < 0) { obidebug(1, "\nError getting the latest version number in a column directory"); return NULL; } } // Get the column file name column_file_name = build_column_file_name(column_name, version_number); if (column_file_name == NULL) { return NULL; } // Open the column file (READ-ONLY) column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY); if (column_file_descriptor < 0) { obidebug(1, "\nError opening a column file"); obi_set_errno(OBICOL_UNKNOWN_ERROR); free(column_file_name); return NULL; } free(column_file_name); // Read the header size if (read(column_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t))) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError reading the header size to read a header"); close(column_file_descriptor); return NULL; } // Fill the header structure header = mmap(NULL, header_size, PROT_READ, MAP_SHARED, column_file_descriptor, 0 ); if (header == MAP_FAILED) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError mmapping the header of a column"); close(column_file_descriptor); return NULL; } if (close(column_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError closing a column file"); return NULL; } return header; } int obi_close_header(OBIDMS_column_header_p header) { if (munmap(header, header->header_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError munmapping a column header"); return -1; } return 0; } // TODO to be rewritten in an optimized and safe way if possible index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name) { char* elements_names; char* name; index_t element_index; elements_names = strdup((column->header)->elements_names); if (elements_names == NULL) { obidebug(1, "\nError strdup-ing the elements names"); return OBIIdx_NA; } element_index = 0; name = strtok(elements_names, ";"); // TODO not thread safe, see strtok_r maybe if (strcmp(element_name, name) == 0) { free(elements_names); return element_index; } element_index++; while (name != NULL) { name = strtok(NULL, ";"); // TODO not thread safe, see strtok_r maybe if (strcmp(element_name, name) == 0) { free(elements_names); return element_index; } element_index++; } obidebug(1, "\nCan't find an element name"); free(elements_names); return OBIIdx_NA; } int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb) { // Check if the column is read-only if (!(column->writable)) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError trying to set a value in a read-only column"); return -1; } // Check that the line number is not greater than the maximum allowed if (line_nb >= MAXIMUM_LINE_COUNT) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed"); return -1; } // Check if the file needs to be enlarged while ((line_nb+1) > (column->header)->line_count) { // Enlarge the file if (obi_enlarge_column(column) < 0) return -1; } // Update lines used if ((line_nb+1) > (column->header)->lines_used) (column->header)->lines_used = line_nb+1; return 0; } int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb) // TODO problem with some columns in a view being empty or shorter and triggering an error because they've been truncated when the view was closed. Fixed with obiview.c in update_lines() for now { if ((line_nb+1) > ((column->header)->line_count)) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError trying to get a value that is beyond the current number of lines of the column"); return -1; } return 0; }