From 2698022aaf399e261e9e5d09c6217a8624e1a2ee Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Tue, 15 Sep 2015 17:09:31 +0200 Subject: [PATCH] New OBIDMS method to list the columns of an OBIDMS --- python/obitools3/obidms/capidms.pxd | 8 +- python/obitools3/obidms/capidms.pyx | 21 ++++- .../obidms/obidmscolumn/capidmscolumn.pxd | 3 + src/obidms.c | 48 ---------- src/obidms.h | 17 ---- src/obidmscolumn.c | 88 ++++++++++++++++++- src/obidmscolumn.h | 14 +++ 7 files changed, 130 insertions(+), 69 deletions(-) diff --git a/python/obitools3/obidms/capidms.pxd b/python/obitools3/obidms/capidms.pxd index 9b6fd49..aad4306 100644 --- a/python/obitools3/obidms/capidms.pxd +++ b/python/obitools3/obidms/capidms.pxd @@ -1,3 +1,4 @@ + cdef extern from *: ctypedef char* const_char_p "const char*" @@ -14,7 +15,10 @@ cdef extern from "obidms.h" nogil: OBIDMS_p obi_open_dms(const_char_p dms_name) OBIDMS_p obi_dms(const_char_p dms_name) int obi_close_dms(OBIDMS_p dms) - int obi_list_columns(OBIDMS_p dms) - + +from obitools3.obidms.obidmscolumn.capidmscolumn cimport * + + cdef class OBIDMS: cdef OBIDMS_p pointer + cdef str dms_name \ No newline at end of file diff --git a/python/obitools3/obidms/capidms.pyx b/python/obitools3/obidms/capidms.pyx index 700106d..d346c66 100644 --- a/python/obitools3/obidms/capidms.pyx +++ b/python/obitools3/obidms/capidms.pyx @@ -1,15 +1,34 @@ #cython: language_level=3 +from pathlib import Path + from .capidms cimport * + cdef class OBIDMS : def __init__(self, dms_name) : dms_name_b = dms_name.encode(encoding='UTF-8') + self.dms_name = dms_name self.pointer = obi_dms(dms_name_b) # def __del__(self) : # obi_close_dms(self.pointer) def list(self): - obi_list_columns(self.pointer) \ No newline at end of file + p = Path(self.dms_name+'.obidms') + #dms = {} + print("{:<25} {:<25} {:<25} {:<25}".format('-Column name-','-Data type-','-Latest version number-', '-Line count of latest version-')) + for entry in p.iterdir(): + if entry.suffix == ".obicol": + column_name = entry.stem + column_name_b = column_name.encode('utf-8') + #dms[column_name] = {} + data_type = (name_data_type(obi_column_get_data_type_from_name(self.pointer, column_name_b))).decode('utf-8') + latest_version = obi_column_get_latest_version_from_name(self.pointer, column_name_b) + line_count = obi_column_get_line_count_from_name(self.pointer, column_name_b) + #dms[column_name]['data_type'] = data_type + #dms[column_name]['latest_version'] = latest_version + #dms[column_name]['line_count'] = line_count + print("{:<25} {:<25} {:<25} {:<25}".format(column_name, data_type, latest_version, line_count)) + \ No newline at end of file diff --git a/python/obitools3/obidms/obidmscolumn/capidmscolumn.pxd b/python/obitools3/obidms/obidmscolumn/capidmscolumn.pxd index 5b242f9..2dcbc84 100644 --- a/python/obitools3/obidms/obidmscolumn/capidmscolumn.pxd +++ b/python/obitools3/obidms/obidmscolumn/capidmscolumn.pxd @@ -35,6 +35,9 @@ cdef extern from "obidmscolumn.h" nogil: OBIType_t obi_column_get_data_type_from_name(OBIDMS_p dms, const char* column_name) OBIDMS_column_p obi_clone_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number, bint clone_data) int obi_truncate_and_close_column(OBIDMS_column_p column) + obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name) + OBIType_t obi_column_get_data_type_from_name(OBIDMS_p dms, const char* column_name) + size_t obi_column_get_line_count_from_name(OBIDMS_p dms, const char* column_name) cdef class OBIDMS_column: diff --git a/src/obidms.c b/src/obidms.c index ed42f82..b025a44 100644 --- a/src/obidms.c +++ b/src/obidms.c @@ -216,54 +216,6 @@ OBIDMS_p obi_dms(const char* dms_name) } -int obi_list_columns(OBIDMS_p dms) -{ - DIR *d; - struct dirent *dir; - char* dir_name; - char* extension; - OBIType_t data_type; - obiversion_t latest_version; - - d = dms->directory; - - dir = readdir(d); - if (dir == NULL) - { - obidebug(1, "\nError reading in the OBIDMS directory"); - return -1; - } - - fprintf(stderr, "Column name\tData type\tLatest version"); - - while (dir != NULL) - { - dir_name = strdup(dir->d_name); - if (dir_name == NULL) - { - obidebug(1, "\nError strdup-ing a directory name"); - return -1; - } - - dir_name = strtok(dir_name, "."); - extension = strtok(NULL, "."); - - if ((extension != NULL) && (strcmp("obicol", extension) == 0)) - // Found a column directory - { - data_type = obi_column_get_data_type_from_name(dms, dir_name); - latest_version = obi_column_get_latest_version_from_name(dms, dir_name); - fprintf(stderr, "\n%s\t%d\t%d", dir_name, data_type, latest_version); - } - dir = readdir(d); - } - - rewinddir(d); - - return 0; -} - - int obi_close_dms(OBIDMS_p dms) { if (dms != NULL) diff --git a/src/obidms.h b/src/obidms.h index 74397e5..aa001df 100644 --- a/src/obidms.h +++ b/src/obidms.h @@ -126,23 +126,6 @@ OBIDMS_p obi_open_dms(const char *dms_name); OBIDMS_p obi_dms(const char *dms_name); -/** - * @brief Lists all the column directories in the OBIDMS, their data type and - * their latest version. - * - * @param dms a pointer as returned by obi_create_dms() or obi_open_dms(). - * - * @return an integer value indicating the success of the operation. - * - * @retvalue 0 on success - * @retvalue -1 on failure and the `obi_errno` variable is set. - * - * @since July 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -int obi_list_columns(OBIDMS_p dms); - - /** * @brief Closes an opened OBITools Data Management instance (OBIDMS). * diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c index e540663..53e2a65 100644 --- a/src/obidmscolumn.c +++ b/src/obidmscolumn.c @@ -1347,13 +1347,99 @@ OBIType_t obi_column_get_data_type_from_name(OBIDMS_p dms, const char* column_na } +size_t obi_column_get_line_count_from_name(OBIDMS_p dms, const char* column_name) +{ + OBIDMS_column_header_p header; + OBIDMS_column_directory_p column_directory; + char* column_file_name; + int column_file_descriptor; + int column_dir_file_descriptor; + size_t header_size; + size_t line_count; + obiversion_t version_number; + + // Get the column directory structure associated to the column + column_directory = obi_open_column_directory(dms, column_name); + if (column_directory == NULL) + { + obidebug(1, "\nError opening a column directory structure"); + return -1; + } + + // Get the file descriptor associated to the column directory + column_dir_file_descriptor = dirfd(column_directory->directory); + if (column_dir_file_descriptor < 0) + { + obi_set_errno(OBICOLDIR_UNKNOWN_ERROR); + obidebug(1, "\nError getting the file descriptor of a column directory"); + obi_close_column_directory(column_directory); + return -1; + } + + // Calculate the header size + header_size = obi_get_platform_header_size(); + + // Get the latest version number + version_number = obi_get_latest_version_number(column_directory); + if (version_number < 0) + { + obidebug(1, "\nError getting the latest version number in a column directory"); + return -1; + } + + // Get the column file name + column_file_name = build_column_file_name(column_name, version_number); + if (column_file_name == NULL) + { + return -1; + } + + // Open the column file (READ-ONLY) + column_file_descriptor = openat(column_dir_file_descriptor, column_file_name, O_RDONLY); + if (column_file_descriptor < 0) + { + obidebug(1, "\nError opening a column file"); + obi_set_errno(OBICOL_UNKNOWN_ERROR); + free(column_file_name); + return -1; + } + + // Fill the header structure + header = mmap(NULL, + header_size, + PROT_READ, + MAP_SHARED, + column_file_descriptor, + 0 + ); + + if (header == MAP_FAILED) + { + obi_set_errno(OBICOL_UNKNOWN_ERROR); + obidebug(1, "\nError mmapping the header of a column"); + close(column_file_descriptor); + free(column_file_name); + return -1; + } + + // Check endianness? + + line_count = header->line_count; + + free(column_file_name); + close(column_file_descriptor); + munmap(header, header_size); + return line_count; +} + + const char* obi_column_get_elements_names(OBIDMS_column_p column) { return (column->header)->elements_names; } -// to be rewritten in an optimized and safe way +// TODO to be rewritten in an optimized and safe way size_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name) { char* elements_names; diff --git a/src/obidmscolumn.h b/src/obidmscolumn.h index deb917e..63ff5ff 100644 --- a/src/obidmscolumn.h +++ b/src/obidmscolumn.h @@ -312,6 +312,20 @@ OBIType_t obi_column_get_data_type(OBIDMS_column_p column); OBIType_t obi_column_get_data_type_from_name(OBIDMS_p dms, const char* column_name); +/** + * @brief Recovers the line count of an OBIDMS column from the column name. + * + * @param dms a pointer on an OBIDMS + * @param column_name the name of an OBIDMS column + * + * @return the line count of the column + * + * @since September 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +size_t obi_column_get_line_count_from_name(OBIDMS_p dms, const char* column_name); + + /** * @brief Recovers the elements names of an OBIDMS column. *