Files
obitools3/src/obidmscolumn.h
2021-03-10 16:50:30 +13:00

608 lines
23 KiB
C
Executable File

/****************************************************************************
* OBIDMS columns header file *
****************************************************************************/
/**
* @file obidmscolumn.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 12 May 2015
* @brief Header file for the functions and structures shared by all the OBIDMS columns.
*/
#ifndef OBIDMSCOLUMN_H_
#define OBIDMSCOLUMN_H_
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdbool.h>
#include <time.h>
#include <stdbool.h>
#include "obidms.h"
#include "obitypes.h"
#include "obierrno.h"
#include "obilittlebigman.h"
#include "obidmscolumndir.h"
#include "obiblob_indexer.h"
// TODO delete useless ones, add default nb?
#define ELEMENTS_NAMES_MAX (1000000) /**< The maximum length of the list of elements names. // TODO Discuss
*/
#define NB_ELTS_MAX_IF_DEFAULT_NAME (1000000) /**< The maximum number of elements per line if the default element names
* are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX. // TODO not up to date
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column (1E9). //TODO
*/
#define COMMENTS_MAX_LENGTH (4096) /**< The maximum length for comments.
*/
#define FORMATTED_ELT_NAMES_SEPARATOR '\0' /**< The separator between elements names once formatted to be stored in columns.
*/
#define NOT_FORMATTED_ELT_NAMES_SEPARATOR ';' /**< The separator between elements names before being formatted to be stored in columns (e.g. as sent by the upper layer).
*/
/**
* @brief Structure referencing a column by its name and its version.
*/
typedef struct Column_reference {
char column_name[OBIDMS_COLUMN_MAX_NAME+1]; /**< Name of the column.
*/
obiversion_t version; /**< Version of the column.
*/
} Column_reference_t, *Column_reference_p;
/**
* @brief OBIDMS column header structure.
*/
typedef struct OBIDMS_column_header {
size_t header_size; /**< Size of the header in bytes.
*/
size_t data_size; /**< Size of the data in bytes.
*/
index_t line_count; /**< Number of lines of data allocated.
*/
index_t lines_used; /**< Number of lines of data used (the highest index where data has been entered + 1).
*/
index_t nb_elements_per_line; /**< Number of elements per line.
*/
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
OBIType_t stored_data_type; /**< Type of the data that is actually stored in the data
* part of the column.
*/
bool dict_column; /**< Whether the column contains dictionary-like values.
*/
bool tuples; /**< A boolean indicating whether the column contains indices referring to indexed tuples.
*/
bool to_eval; /**< A boolean indicating whether the column contains expressions that should be evaluated
* (typically OBI_STR columns containing character strings to be evaluated by Python).
*/
time_t creation_date; /**< Date of creation of the file.
*/
obiversion_t version; /**< Version of the column.
*/
obiversion_t cloned_from; /**< Version of the column from which this column
* was cloned from (-1 if it was not created by cloning
* another column).
*/
char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string.
*/
char indexer_name[INDEXER_MAX_NAME+1]; /**< If there is one, the indexer name as a NULL terminated string.
*/
Column_reference_t associated_column; /**< If there is one, the reference to the associated column.
*/
bool finished; /**< A boolean indicating whether the column was properly closed by the view that created it.
*/
char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string.
*/
int64_t elements_names_length; /**< Length of the character array where the elements names are stored.
*/
char* elements_names; /**< Pointer in mem_arnea on the names of the line elements with '\0' as separator
* and '\0\0' as terminal flag.
* (default are the indices: "0\01\02\0...\0n\0\0").
*/
int64_t* elements_names_idx; /**< Pointer in mem_arena on the index for the start of each element name in elements_names.
*/
int64_t* sorted_elements_idx; /**< Index for the sorted element names in elements_names_idx.
*/
byte_t mem_arena[]; /**< Memory array where the elements names, the elements names index and the sorted elements index are stored.
*/
} OBIDMS_column_header_t, *OBIDMS_column_header_p;
/**
* @brief OBIDMS column structure.
*
* A data structure of this type is returned by the functions
* creating, opening or cloning an OBIDMS column.
*/
typedef struct OBIDMS_column {
OBIDMS_p dms; /**< A pointer to the OBIDMS structure to
* which the column belongs.
*/
OBIDMS_column_directory_p column_directory; /**< A pointer to the OBIDMS column directory
* structure to which the column belongs.
*/
OBIDMS_column_header_p header; /**< A pointer to the header of the column.
*/
Obi_indexer_p indexer; /**< A pointer to the blob indexer associated
* with the column if there is one.
*/
void* data; /**< A `void` pointer to the beginning of the data.
*
* @warning Never use this member directly outside
* of the code of the low level functions
* of the OBIDMS.
*/
bool writable; /**< Indicates if the column is writable or not.
* - `true` the column is writable
* - `false` the column is read-only
*
* A column is writable only by its creator
* until it closes it.
*/
size_t counter; /**< Indicates by how many threads/programs
* (TODO) the column is used.
*/
} OBIDMS_column_t, *OBIDMS_column_p;
/**
* @brief Function building the full path to the version file of a column in an OBIDMS.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms A pointer on the OBIDMS.
* @param column_name The name of the OBIDMS column file.
*
* @returns A pointer to the version file name.
* @retval NULL if an error occurred.
*
* @since October 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_version_file_full_path(OBIDMS_p dms, const char* column_name);
/**
* @brief Function building the full path to the version file of a column in an OBIDMS.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms A pointer on the OBIDMS.
* @param column_name The name of the OBIDMS column file.
* @param version_number The version number of the OBIDMS column file.
*
* @returns A pointer to the version file name.
* @retval NULL if an error occurred.
*
* @since October 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_full_path(OBIDMS_p dms, const char* column_name, obiversion_t version_number);
/**
* @brief Returns the latest version number of a column in a column directory using the column directory structure.
*
* @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory().
*
* @returns The latest version number kept in the version file.
* @retval -1 if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory);
/**
* @brief Returns the latest version of a column in a column directory using the column name.
*
* @param dms A pointer on an OBIDMS.
* @param column_name The column name.
*
* @returns The latest version number kept in the version file.
* @retval -1 if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name);
/**
* @brief Returns the header size in bytes of a column.
*
* The header size is rounded to a multiple of the memory page size.
*
* @param nb_elements_per_line The number of elements per line.
* @param elts_names_length The length of elements_names including the two terminal '\0's.
*
* @returns The header size in bytes.
*
* @since May 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t obi_calculate_header_size(index_t nb_elements_per_line, int64_t elts_names_length);
/**
* @brief Creates a column.
*
* The minimum data size allocated is one memory page, and the data is initialized to the NA value of the OBIType.
* If there is an indexer associated with the column, it is opened or created if it does not already exist.
*
* @warning If there is one element per line, elements_names should be equal to column_name. // TODO change this condition?
*
* @param dms A pointer on an OBIDMS.
* @param column_name The name of the new column.
* @param data_type The OBIType code of the data.
* @param nb_lines The number of lines to be stored (can be 0 if not known).
* @param nb_elements_per_line The number of elements per line.
* @param elements_names The names of the elements with ';' as separator (no terminal ';'),
* NULL or "" if the default names are to be used ("0\01\02\0...\0n").
* @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
* @param dict_column A boolean indicating whether the column should contain dictionary-like values.
* @param tuples A boolean indicating whether the column should contain indices referring to indexed tuples.
* @param to_eval A boolean indicating whether the column contains expressions that should be evaluated
* (typically OBI_STR columns containing character strings to be evaluated by Python).
* @param indexer_name The name of the indexer if there is one associated with the column.
* If NULL or "", the indexer name is set as the column name.
* @param associated_column_name The name of the associated column if there is one (otherwise NULL or "").
* @param associated_column_version The version of the associated column if there is one (otherwise -1).
* @param comments Optional comments associated with the column in JSON format (NULL, "" or "{}" if no comments associated).
*
* @returns A pointer on the newly created column structure.
* @retval NULL if an error occurred.
*
* @since May 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* column_name,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
char* elements_names,
bool elt_names_formatted,
bool dict_column,
bool tuples,
bool to_eval,
const char* indexer_name,
const char* associated_column_name,
obiversion_t associated_column_version,
const char* comments
);
/**
* @brief Opens a column in read-only mode.
*
* @param dms A pointer on an OBIDMS.
* @param column_name The name of the column.
* @param version_number The version of the column that should be opened (if -1, the latest version is retrieved).
*
* @returns A pointer on the opened column structure.
* @retval NULL if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_p obi_open_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number);
/**
* @brief Clones a column, and returns a pointer to the writable new column.
*
* @param dms A pointer on an OBIDMS.
* @param column_name The name of the column to clone.
* @param version_number The version of the column that should be cloned (if -1, the latest version is retrieved).
* @param clone_data Whether the data should be copied or not.
*
* @returns A pointer to the created column.
* @retval NULL if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, OBIDMS_column_p line_selection, const char* column_name, obiversion_t version_number, bool clone_data);
/**
* @brief Clones a column indexer to have it writable.
*
* @param column A pointer on an OBIDMS column.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since November 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_clone_column_indexer(OBIDMS_column_p column);
/**
* @brief Truncates a column to the number of lines used if it is not read-only and closes it.
*
* @warning This function does not flag the column as finished, only finish_view() in the obiview source file does that.
*
* @param column A pointer on an OBIDMS column.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_column(OBIDMS_column_p column);
/**
* @brief Truncates a column file to the number of lines used rounded to the nearest
* greater multiple of the page size.
*
* @param column A pointer on an OBIDMS column.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_truncate_column(OBIDMS_column_p column);
/**
* @brief Enlarges a column file.
*
* @param column A pointer on an OBIDMS column.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_enlarge_column(OBIDMS_column_p column);
/**
* @brief Writes comments to a column file.
*
* @warning This overwrites any other previous comments.
*
* @param column A pointer on an OBIDMS column.
* @param comments A character string containing the comments.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since August 2018
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_write_comments(OBIDMS_column_p column, const char* comments);
/**
* @brief Adds comments to a column file.
*
* This reads the comments in the JSON format and adds the key value pair.
* If the key already exists, the value format is turned to array and the new value is appended
* if it is not already in the array.
*
* @param column A pointer on an OBIDMS column.
* @param key The key.
* @param value The value associated with the key.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since August 2018
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_add_comment(OBIDMS_column_p column, const char* key, const char* value);
/*
* @brief Sets the data in a column to the specified value.
*
* @warning The specified value should be the atomic value effectively stored in the column (i.e. it can not be a character string for example).
*
* @param column A pointer on an OBIDMS column.
* @param start The first line number of the block that should be set.
* @param nb_lines The number of lines that should be set.
* @param value_p A pointer on the value to which the column should be set.
*
* @since May 2018
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void obi_set_column_to_value(OBIDMS_column_p column,
index_t first_line_nb,
index_t nb_lines,
void* value_p);
/*
* @brief Sets the data in a column to the NA value of the data OBIType.
*
* @param column A pointer on an OBIDMS column.
* @param start The first line number of the block that should be set.
* @param nb_lines The number of lines that should be set.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t nb_lines); // TODO make private?
/**
* @brief Recovers the header of an OBIDMS column from the column name.
*
* @warning The header structure has to be munmapped by the caller.
*
* @param dms A pointer on an OBIDMS.
* @param column_name The name of an OBIDMS column.
* @param version_number The version of the column from which the header should be
* retrieved (-1: latest version).
*
* @returns A pointer on the mmapped header of the column.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number);
/**
* @brief Munmap a mmapped header as returned by obi_column_get_header_from_name().
*
* @param header A pointer on the mmapped header structure.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_header(OBIDMS_column_header_p header);
/**
* @brief Recovers the index of an element in an OBIDMS column from the element's name.
*
* @param column A pointer on an OBIDMS column.
* @param element_name The name of the element.
*
* @returns The index of the element in a line of the column.
* @retval OBIIdx_NA if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
/**
* @brief Recovers the elements names of the lines of a column, with ';' as separator (i.e. "0;1;2;...;n\0").
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
*
* @returns A pointer on a character array where the elements names are stored.
* @retval NULL if an error occurred.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_get_elements_names(OBIDMS_column_p column);
/**
* @brief Recovers the elements names of the lines of a column with a human readable format ("0; 1; 2; ...; n\0").
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
*
* @returns A pointer on a character array where the elements names are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_get_formatted_elements_names(OBIDMS_column_p column);
/**
* @brief Returns the informations of a column with a human readable format (data type, element names, comments).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
* @param detailed Whether the informations should contain column comments or just data type and element names.
*
* @returns A pointer on a character array where the formatted column informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed);
/**
* @brief Prepares a column to set a value.
*
* @param column A pointer on an OBIDMS column.
* @param line_nb The number of the line at which the value will be set.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, index_t elt_idx);
/**
* @brief Prepares a column to recover a value.
*
* @param column A pointer on an OBIDMS column.
* @param line_nb The number of the line at which the value will be recovered.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb);
/**
* @brief Goes through all the column files of a DMS and checks for views that have
* not been flagged as finished (done by the finish_view() function in the
* obiview source file).
*
* @param dms A pointer on an OBIDMS.
*
* @returns A boolean indicating whether unfinished views were found.
*
* @since September 2019
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_dms_has_unfinished_columns(OBIDMS_p dms);
/**
* @brief Goes through all the column files of a DMS and deletes columns that have
* not been flagged as finished (done by the finish_view() function in the
* obiview source file).
*
* @param dms A pointer on an OBIDMS.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since October 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_clean_unfinished_columns(OBIDMS_p dms);
#endif /* OBIDMSCOLUMN_H_ */