diff --git a/python/obitools3/obidms/_obidms.cfiles b/python/obitools3/obidms/_obidms.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obidms.cfiles +++ b/python/obitools3/obidms/_obidms.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obidms.pxd b/python/obitools3/obidms/_obidms.pxd index ce3fef9..e334c6d 100644 --- a/python/obitools3/obidms/_obidms.pxd +++ b/python/obitools3/obidms/_obidms.pxd @@ -18,7 +18,6 @@ cdef class OBIDMS_column: cdef index_t nb_elements_per_line cdef list elements_names - cpdef update_pointer(self) cpdef list get_elements_names(self) cpdef str get_data_type(self) cpdef index_t get_nb_lines_used(self) @@ -52,6 +51,7 @@ cdef class OBIView: cpdef add_column(self, str column_name, obiversion_t version_number=*, + str alias=*, str type=*, index_t nb_lines=*, index_t nb_elements_per_line=*, @@ -62,6 +62,8 @@ cdef class OBIView: str comments=*, bint create=* ) + cpdef change_column_alias(self, str current_alias, str new_alias) + cpdef update_column_pointers(self) cpdef select_line(self, index_t line_nb) cpdef select_lines(self, list line_selection) cpdef save_and_close(self) diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx index 6437090..c5903da 100644 --- a/python/obitools3/obidms/_obidms.pyx +++ b/python/obitools3/obidms/_obidms.pyx @@ -55,7 +55,7 @@ from ._obidmscolumn_seq cimport OBIDMS_column_seq, \ from .capi.obiview cimport Obiview_p, \ Obiview_infos_p, \ - Column_reference_p, \ + Alias_column_pair_p, \ obi_new_view_nuc_seqs, \ obi_new_view, \ obi_new_view_cloned_from_name, \ @@ -65,7 +65,7 @@ from .capi.obiview cimport Obiview_p, \ obi_open_view, \ obi_view_delete_column, \ obi_view_add_column, \ - obi_view_get_column, \ + obi_view_create_column_alias, \ obi_view_get_column, \ obi_view_get_pointer_on_column_in_view, \ obi_select_line, \ @@ -124,9 +124,6 @@ cdef class OBIDMS_column : for line_nb in range(lines_used): yield self.get_line(line_nb) - cpdef update_pointer(self): - self.pointer = obi_view_get_pointer_on_column_in_view(self.view.pointer, str2bytes(self.column_name)) - cpdef list get_elements_names(self): return self.elements_names @@ -297,16 +294,16 @@ cdef class OBIView : for i in range(view.infos.column_count) : column_p = (view.columns)[i] header = (column_p).header - col_name = bytes2str(header.name) + col_name = bytes2str(view.infos.column_references[i].alias) subclass = OBIDMS_column.get_subclass_type(column_p) self.columns[col_name] = subclass(self, col_name) - + def __repr__(self) : cdef str s s = str(self.name) + "\n" + str(self.comments) + "\n" + str(self.pointer.infos.line_count) + " lines\n" for column_name in self.columns : - s = s + self.columns[column_name].__repr__() + '\n' + s = s + column_name + ": " + self.columns[column_name].__repr__() + '\n' return s @@ -317,15 +314,15 @@ cdef class OBIView : if obi_view_delete_column(self.pointer, str2bytes(column_name)) < 0 : raise Exception("Problem deleting a column from a view") - # Update the dictionaries of column pointers and column objects, and update pointers in column objects (make function?): + # Update the dictionary of column objects: (self.columns).pop(column_name) - for column_n in self.columns : - (self.columns[column_n]).update_pointer() + self.update_column_pointers() cpdef add_column(self, str column_name, obiversion_t version_number=-1, + str alias='', str type='', index_t nb_lines=0, index_t nb_elements_per_line=1, @@ -343,6 +340,11 @@ cdef class OBIView : cdef OBIDMS_column_p column_p column_name_b = str2bytes(column_name) + if alias == '' : + alias = column_name + alias_b = column_name_b + else : + alias_b = str2bytes(alias) if nb_elements_per_line > 1 : elements_names_b = str2bytes(';'.join(elements_names)) @@ -366,8 +368,8 @@ cdef class OBIView : data_type = OBI_SEQ else : raise Exception("Invalid provided data type") - - if (obi_view_add_column(self.pointer, column_name_b, version_number, # TODO should return pointer on column? + + if (obi_view_add_column(self.pointer, column_name_b, version_number, alias_b, # TODO should return pointer on column? data_type, nb_lines, nb_elements_per_line, elements_names_b, str2bytes(indexer_name), str2bytes(associated_column_name), associated_column_version, @@ -375,12 +377,28 @@ cdef class OBIView : raise Exception("Problem adding a column in a view") # Get the column pointer - column_p = obi_view_get_column(self.pointer, column_name_b) + column_p = obi_view_get_column(self.pointer, alias_b) # Open and store the subclass subclass = OBIDMS_column.get_subclass_type(column_p) - (self.columns)[column_name] = subclass(self, column_name) - + (self.columns)[alias] = subclass(self, alias) + + + cpdef change_column_alias(self, str current_alias, str new_alias): + if (obi_view_create_column_alias(self.pointer, str2bytes(current_alias), str2bytes(new_alias)) < 0) : + raise Exception("Problem changing a column alias") + # Update the dictionaries of column column objects + self.columns[new_alias] = self.columns[current_alias] + (self.columns).pop(current_alias) + + + cpdef update_column_pointers(self): + cdef str column_n + cdef OBIDMS_column column + for column_n in self.columns : + column = self.columns[column_n] + column.pointer = obi_view_get_pointer_on_column_in_view(self.pointer, str2bytes(column_n)) + cpdef save_and_close(self) : if (obi_save_and_close_view(self.pointer) < 0) : @@ -488,7 +506,7 @@ cdef class OBIView_NUC_SEQS(OBIView): for i in range(view.infos.column_count) : column_p = (view.columns)[i] header = (column_p).header - col_name = bytes2str(header.name) + col_name = bytes2str(view.infos.column_references[i].alias) subclass = OBIDMS_column.get_subclass_type(column_p) self.columns[col_name] = subclass(self, col_name) @@ -548,7 +566,7 @@ cdef class OBIView_line : (((self.view).columns)[column_name]).set_line(self.index, value) def __contains__(self, str column_name): - return (column_name in self.view) + return (column_name in self.view.columns) def __repr__(self): cdef dict line @@ -618,7 +636,7 @@ cdef class OBIDMS : cdef Obiview_infos_p view_infos_p cdef dict view_infos_d - cdef Column_reference_p column_refs + cdef Alias_column_pair_p column_refs cdef int i, j cdef str column_name @@ -638,11 +656,12 @@ cdef class OBIDMS : view_infos_d["line_selection"]["column_name"] = bytes2str((view_infos_p.line_selection).column_name) view_infos_d["line_selection"]["version"] = (view_infos_p.line_selection).version view_infos_d["column_references"] = {} - column_refs = view_infos_p.column_references + column_references = view_infos_p.column_references for j in range(view_infos_d["column_count"]) : - column_name = bytes2str((column_refs[j]).column_name) + column_name = bytes2str((column_references[j]).alias) view_infos_d["column_references"][column_name] = {} - view_infos_d["column_references"][column_name]["version"] = column_refs[j].version + view_infos_d["column_references"][column_name]["original_name"] = bytes2str((column_references[j]).column_refs.column_name) + view_infos_d["column_references"][column_name]["version"] = (column_references[j]).column_refs.version obi_view_unmap_file(self.pointer, view_infos_p) diff --git a/python/obitools3/obidms/_obidmscolumn_bool.cfiles b/python/obitools3/obidms/_obidmscolumn_bool.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obidmscolumn_bool.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_bool.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obidmscolumn_char.cfiles b/python/obitools3/obidms/_obidmscolumn_char.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obidmscolumn_char.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_char.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obidmscolumn_float.cfiles b/python/obitools3/obidms/_obidmscolumn_float.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obidmscolumn_float.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_float.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obidmscolumn_int.cfiles b/python/obitools3/obidms/_obidmscolumn_int.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obidmscolumn_int.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_int.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obidmscolumn_qual.cfiles b/python/obitools3/obidms/_obidmscolumn_qual.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obidmscolumn_qual.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_qual.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obidmscolumn_seq.cfiles b/python/obitools3/obidms/_obidmscolumn_seq.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obidmscolumn_seq.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_seq.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obidmscolumn_str.cfiles b/python/obitools3/obidms/_obidmscolumn_str.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obidmscolumn_str.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_str.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obiseq.cfiles b/python/obitools3/obidms/_obiseq.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obiseq.cfiles +++ b/python/obitools3/obidms/_obiseq.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/_obitaxo.cfiles b/python/obitools3/obidms/_obitaxo.cfiles index bf37301..c87f58d 100644 --- a/python/obitools3/obidms/_obitaxo.cfiles +++ b/python/obitools3/obidms/_obitaxo.cfiles @@ -8,6 +8,8 @@ ../../../src/dna_seq_indexer.c ../../../src/encode.h ../../../src/encode.c +../../../src/hashtable.h +../../../src/hashtable.c ../../../src/murmurhash2.h ../../../src/murmurhash2.c ../../../src/obi_align.h diff --git a/python/obitools3/obidms/capi/obiview.pxd b/python/obitools3/obidms/capi/obiview.pxd index b64b756..1c00241 100644 --- a/python/obitools3/obidms/capi/obiview.pxd +++ b/python/obitools3/obidms/capi/obiview.pxd @@ -26,6 +26,13 @@ cdef extern from "obiview.h" nogil: extern const_char_p QUALITY_COLUMN + struct Alias_column_pair_t : + Column_reference_t column_refs + const_char_p alias + + ctypedef Alias_column_pair_t* Alias_column_pair_p + + struct Obiview_infos_t : time_t creation_date const_char_p name @@ -35,7 +42,7 @@ cdef extern from "obiview.h" nogil: Column_reference_t line_selection index_t line_count int column_count - Column_reference_p column_references + Alias_column_pair_p column_references const_char_p comments ctypedef Obiview_infos_t* Obiview_infos_p @@ -48,7 +55,9 @@ cdef extern from "obiview.h" nogil: OBIDMS_column_p line_selection OBIDMS_column_p new_line_selection OBIDMS_column_p columns - + int nb_predicates + # TODO declarations for column dictionary and predicate function array? + ctypedef Obiview_t* Obiview_p @@ -69,6 +78,7 @@ cdef extern from "obiview.h" nogil: int obi_view_add_column(Obiview_p view, const_char_p column_name, obiversion_t version_number, + const_char_p alias, OBIType_t data_type, index_t nb_lines, index_t nb_elements_per_line, @@ -89,6 +99,8 @@ cdef extern from "obiview.h" nogil: OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name) + int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias) + int obi_save_view(Obiview_p view) int obi_close_view(Obiview_p view) diff --git a/src/hashtable.c b/src/hashtable.c new file mode 100644 index 0000000..356fa26 --- /dev/null +++ b/src/hashtable.c @@ -0,0 +1,197 @@ +/**************************************************************************** + * Hash table source file * + ****************************************************************************/ + +/** + * @file hashtable.c + * @author Celine Mercier + * @date July 26th 2016 + * @brief Source file for hash table functions. + */ + + +#include +#include +#include +#include "string.h" + +#include "murmurhash2.h" +#include "hashtable.h" + + +// Create a new hashtable +hashtable_p ht_create(size_t size) +{ + hashtable_p hashtable = NULL; + size_t i; + + // Allocate the table + hashtable = malloc(sizeof(hashtable_t)); + if (hashtable == NULL) + return NULL; + + // Allocate the head nodes + hashtable->table = malloc(size * sizeof(entry_p)); + if (hashtable->table == NULL) + return NULL; + + // Initialize the head nodes + for (i=0; itable[i] = NULL; + + hashtable->size = size; + + return hashtable; +} + + +// Create an entry +entry_p ht_new_entry(const char* key, void* value) +{ + entry_p new_entry; + + new_entry = malloc(sizeof(entry_t)); + if (new_entry == NULL) + return NULL; + + new_entry->key = strdup(key); + if (new_entry->key == NULL) + return NULL; + + new_entry->value = value; + + new_entry->next = NULL; + + return new_entry; +} + + +// Delete an entry +int ht_delete_entry(hashtable_p hashtable, const char* key) +{ + entry_p last = NULL; + entry_p entry = NULL; + size_t bin = 0; + + bin = murmurhash2(key, strlen(key), SEED); + bin = bin % hashtable->size; + + // Step through the bin looking for the value + entry = hashtable->table[bin]; + + while ((entry != NULL) && (strcmp(key, entry->key ) != 0)) + { + last = entry; + entry = entry->next; + } + + if (entry == NULL) // key not found + return -1; + + // Link the entries before and after the entry + if (last != NULL) // If not head node + last->next = entry->next; + else // If head node + hashtable->table[bin] = entry->next; + + // Free the entry + free(entry->key); + free(entry->value); + free(entry); + + return 0; +} + + +// Set a new entry in the hash table. If the key is already in the table, the value is replaced by the new one +int ht_set(hashtable_p hashtable, const char* key, void* value) +{ + size_t bin = 0; + entry_p new_entry = NULL; + entry_p next = NULL; + entry_p last = NULL; + + if ((key == NULL) || (value == NULL)) + return -1; + + bin = murmurhash2(key, strlen(key), SEED); + bin = bin % hashtable->size; + + next = hashtable->table[bin]; + + while ((next != NULL) && (strcmp(key, next->key) != 0)) + { + last = next; + next = next->next; + } + + // If the key is already in the table, the value is replaced + if ((next != NULL) && (strcmp(key, next->key) == 0)) + new_entry->value = value; + + // Else, create the new entry and link it at the end of the list + else + { + // Create the new entry + new_entry = ht_new_entry(key, value); + if (new_entry == NULL) + return -1; + + // If it is the first entry of that bin, we're at the head node of the list, and we replace it with the new entry + if (last == NULL) + hashtable->table[bin] = new_entry; + + // Else link the new entry at the end of the list + else + last->next = new_entry; + } + return 0; +} + + +// Retrieve a value from a hash table +void* ht_get(hashtable_p hashtable, const char* key) +{ + size_t bin = 0; + entry_p entry; + + bin = murmurhash2(key, strlen(key), SEED); + bin = bin % hashtable->size; + + // Step through the bin looking for the value + entry = hashtable->table[bin]; + + while ((entry != NULL) && (strcmp(key, entry->key ) != 0)) + entry = entry->next; + + if (entry == NULL) + return NULL; + + else + return entry->value; +} + + +// Free the hash table +void ht_free(hashtable_p hashtable) +{ + size_t i; + entry_p entry; + entry_p next; + + for (i=0; i < hashtable->size; i++) + { + next = hashtable->table[i]; + while (next != NULL) + { + entry = next; + free(entry->key); + next = entry->next; + free(entry); + } + } + free(hashtable->table); + free(hashtable); +} + + diff --git a/src/hashtable.h b/src/hashtable.h new file mode 100644 index 0000000..af1190a --- /dev/null +++ b/src/hashtable.h @@ -0,0 +1,123 @@ +/**************************************************************************** + * Hash table header file * + ****************************************************************************/ + +/** + * @file hashtable.h + * @author Celine Mercier + * @date July 26th 2016 + * @brief Header file for hash table functions. + */ + + +#ifndef HASHTABLE_H_ +#define HASHTABLE_H_ + + +#include +#include +#include + + +#define SEED (0x9747b28c) /**< The seed used by the hash function. + */ + + +/** + * @brief Structure for an entry. + */ +typedef struct entry_s { + char* key; /**< Key used to refer to the entry. + */ + void* value; /**< Pointer on the value to be stored. + */ + struct entry_s* next; /**< Pointer on the next entry in the bin. + */ +} entry_t, *entry_p; + + +/** + * @brief Structure for a hash table. + */ +typedef struct hashtable { + size_t size; /**< Number of bins in the table. + */ + entry_p* table; /**< Table of bins. + */ +} hashtable_t, *hashtable_p; + + +/** + * @brief Creates a new hashtable. + * + * @param size The number of bins in the hash table. + * + * @returns A pointer to the newly created hash table. + * @retval NULL if an error occurred. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +hashtable_p ht_create(size_t size); + + +/** + * @brief Inserts a new entry in the hash table. + * If the key is already in the table, the value is replaced by the new one. + * + * @param hashtable A pointer on the hash table structure. + * @param key The key. + * @param value A pointer on the value associated with the key. + * + * @retval 0 if the entry was correctly set. + * @retval -1 if an error occurred. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int ht_set(hashtable_p hashtable, const char* key, void* value); + + +/** + * @brief Retrieves a value from a hash table. + * + * @param hashtable A pointer on the hash table structure. + * @param key The key. + * + * @returns A pointer on the value associated with the key. + * @retval NULL if the key was not found. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +void* ht_get(hashtable_p hashtable, const char* key); + + +/** + * @brief Deletes an entry. + * + * @param hashtable A pointer on the hash table structure. + * @param key The key. + * + * @retval 0 if the entry was correctly deleted. + * @retval -1 if an error occurred. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int ht_delete_entry(hashtable_p hashtable, const char* key); + + +/** + * @brief Frees a hash table. + * + * @param hashtable A pointer on the hash table structure. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +void ht_free(hashtable_p hashtable); + + +#endif /* HASHTABLE_H_ */ + diff --git a/src/obiview.c b/src/obiview.c index 61cdca7..add3e42 100644 --- a/src/obiview.c +++ b/src/obiview.c @@ -30,6 +30,7 @@ #include "obierrno.h" #include "obidebug.h" #include "obilittlebigman.h" +#include "hashtable.h" #include "utils.h" @@ -42,6 +43,16 @@ * **************************************************************************/ +/** + * Internal function calculating the size of the file where the informations about an obiview are stored. + * + * @returns The size of the file in bytes. + * + * @since June 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +size_t get_platform_view_file_size(); + /** * Internal function building the file name where the informations about an obiview are stored. @@ -82,6 +93,14 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name); * The column references stored in the mapped view infos structures are updated * to match the columns opened in the opened view structure. * + * @warning The column pointer array should be up to date before using this function. + * @warning Aliases are not updated by this function and have to be edited separately. + * This function simply reads the column pointer array associated with the view + * and fills the column names and versions in the column reference array accordingly, + * without touching the alias. + * That means that for example if there is a shift in the column pointer array, this + * function should not be used. + * * @param view A pointer on the view. * * @since June 2016 @@ -90,6 +109,69 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name); void update_column_refs(Obiview_p view); +/** + * @brief Internal function creating the column dictionary associated with a view. + * + * The column dictionary is built from the column references array, and associates each column alias + * with the pointer on the column. + * + * @warning The column reference array and the column pointer array should be up to date before using this function. + * + * @param view A pointer on the view. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int create_column_dict(Obiview_p view); + + +/** + * @brief Internal function updating the column dictionary associated with a view. + * + * The column dictionary is built from the column references array, and associates each column alias + * with the pointer on the column. + * + * @warning The column reference array and the column pointer array should be up to date before using this function. + * + * @param view A pointer on the view. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int update_column_dict(Obiview_p view); + + +/** + * @brief Internal function updating the column reference array and the column dictionary associated with a view. + * + * The column reference array is updated from the column pointer array, then the column dictionary that + * and associates each column alias with the pointer on the column is updated from the column reference array. + * + * @warning The column pointer array should be up to date before using this function. + * @warning Aliases are not updated by this function and have to be edited separately. + * This function simply reads the column pointer array associated with the view + * and fills the column names and versions in the column reference array accordingly, + * without touching the alias. + * That means that for example if there is a shift in the column pointer array, this + * function should not be used. + * + * @param view A pointer on the view. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int update_column_refs_and_dict(Obiview_p view); + + /** * @brief Internal function to update the line count in the context of a view. * @@ -368,19 +450,72 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name) } - void update_column_refs(Obiview_p view) { int i; for (i=0; i < (view->infos)->column_count; i++) { - strcpy((((view->infos)->column_references)+i)->column_name, (((view->columns)[i])->header)->name); - (((view->infos)->column_references)+i)->version = (((view->columns)[i])->header)->version; + strcpy(((((view->infos)->column_references)[i]).column_refs).column_name, (((view->columns)[i])->header)->name); + ((((view->infos)->column_references)[i]).column_refs).version = (((view->columns)[i])->header)->version; } } +int create_column_dict(Obiview_p view) +{ + int i; + + view->column_dict = ht_create(MAX_NB_OPENED_COLUMNS); + if (view->column_dict == NULL) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError creating a column dictionary"); + return -1; + } + + // Rebuild the dictionary from the column references and the column pointer array associated with the view + for (i=0; i < (view->infos)->column_count; i++) + { + // Check that each alias is unique + if (ht_get(view->column_dict, (((view->infos)->column_references)[i]).alias) != NULL) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError: the name/alias identifying a column in a view is not unique"); + return -1; + } + + if (ht_set(view->column_dict, (((view->infos)->column_references)[i]).alias, (view->columns)[i]) < 0) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError adding a column in a column dictionary"); + return -1; + } + } + + return 0; +} + + +int update_column_dict(Obiview_p view) +{ + // Re-initialize the dictionary to rebuild it from scratch + ht_free(view->column_dict); + + if (create_column_dict(view) < 0) + return -1; + + return 0; +} + + +int update_column_refs_and_dict(Obiview_p view) +{ + update_column_refs(view); + return update_column_dict(view); +} + + int update_lines(Obiview_p view, index_t line_count) { int i; @@ -408,6 +543,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name) OBIDMS_column_p current_line_selection = NULL; OBIDMS_column_p column = NULL; OBIDMS_column_p column_buffer; + bool found; // Check that the view is not read-only if (view->read_only) @@ -422,9 +558,10 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name) else current_line_selection = view->line_selection; + found = false; for (i=0; i<((view->infos)->column_count); i++) { - if ((current_line_selection != NULL) || (!(strcmp((((view->columns)[i])->header)->name, column_name)))) + if ((current_line_selection != NULL) || (!strcmp((((view->infos)->column_references)[i]).alias, column_name))) { // Clone with the right line selection and replace (for all columns if there is a line selection) // Save pointer to close column after cloning @@ -442,7 +579,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name) // Close old cloned column obi_close_column(column_buffer); - if (!(strcmp((((view->columns)[i])->header)->name, column_name))) + if (!strcmp((((view->infos)->column_references)[i]).alias, column_name)) { // Found the column to return column = (view->columns)[i]; } @@ -464,8 +601,8 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name) view->new_line_selection = NULL; } - // Update column references in view infos - update_column_refs(view); + // Update column refs and dict + update_column_refs_and_dict(view); return column; } @@ -473,7 +610,8 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name) int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p) { - char* column_name; + int i; + char* column_name = NULL; // Check that the view is not read-only if (view->read_only) @@ -491,22 +629,25 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i if (view->line_selection != NULL) (*line_nb_p) = *(((index_t*) ((view->line_selection)->data)) + (*line_nb_p)); - column_name = (char*) malloc(strlen(((*column_pp)->header)->name) * sizeof(char)); + // Get the name/alias of the column from the pointer + for (i=0; i<((view->infos)->column_count); i++) + { + if (obi_view_get_column(view, (((view->infos)->column_references)[i]).alias) == *column_pp) + column_name = (((view->infos)->column_references)[i]).alias; + } if (column_name == NULL) { - obi_set_errno(OBI_MALLOC_ERROR); - obidebug(1, "\nError trying to allocate memory for a column name"); + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError trying to clone a column in a view: column alias not found from pointer"); return -1; } - strcpy(column_name, ((*column_pp)->header)->name); + (*column_pp) = clone_column_in_view(view, column_name); if ((*column_pp) == NULL) { obidebug(1, "\nError trying to clone a column to modify it"); return -1; } - - free(column_name); } if (((*line_nb_p)+1) > (view->infos)->line_count) @@ -797,6 +938,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl } view->dms = dms; + view->read_only = 0; // Create view file if (create_obiview_file(dms, view_name) < 0) @@ -888,24 +1030,12 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl (view->infos)->line_count = (view_to_clone->infos)->line_count; } - for (i=0; i<((view_to_clone->infos)->column_count); i++) - { - (view->columns)[i] = obi_open_column(dms, (((view_to_clone->columns)[i])->header)->name, (((view_to_clone->columns)[i])->header)->version); - if ((view->columns)[i] == NULL) - { - if (view->line_selection != NULL) - obi_close_column(view->line_selection); - obi_view_unmap_file(view->dms, view->infos); - free(view); - return NULL; - } - } - - (view->infos)->column_count = (view_to_clone->infos)->column_count; + // Fill informations strcpy((view->infos)->view_type, (view_to_clone->infos)->view_type); strcpy((view->infos)->created_from, (view_to_clone->infos)->name); view->new_line_selection = NULL; } + // Else, fill empty view structure else { @@ -919,10 +1049,10 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl //view->columns = NULL; // TODO } + // Fill last informations strcpy((view->infos)->name, view_name); strcpy((view->infos)->comments, comments); (view->infos)->creation_date = time(NULL); - view->read_only = 0; view->nb_predicates = 0; view->predicate_functions = NULL; @@ -938,8 +1068,44 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl ((view->infos)->line_selection).version = ((view->line_selection)->header)->version; } - // Store references for columns - update_column_refs(view); + // Create the column dictionary (hash table) associating column names (or aliases) to column pointers + if (create_column_dict(view) < 0) + { + obi_close_view(view); + return NULL; + } + + // Once the view has been created with all its elements and informations, add the columns if the view is cloned from another view + // Add the columns from the view to clone in the new view + if (view_to_clone != NULL) + { + (view->infos)->column_count = 0; + for (i=0; i<((view_to_clone->infos)->column_count); i++) + { + if (obi_view_add_column(view, + (((view_to_clone->columns)[i])->header)->name, + (((view_to_clone->columns)[i])->header)->version, + (((view_to_clone->infos)->column_references)[i]).alias, + 0, + (view->infos)->line_count, + 0, + NULL, + NULL, + NULL, + -1, + NULL, + false) + < 0) + { + obidebug(1, "\nError adding a column in a new view from a view to clone"); + if (view->line_selection != NULL) + obi_close_column(view->line_selection); + obi_view_unmap_file(view->dms, view->infos); + free(view); + return NULL; + } + } + } return view; } @@ -985,26 +1151,26 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v if (view_to_clone == NULL) { // Adding sequence column - if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", NULL, -1, "Nucleotide sequences", true) < 0) + if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NUC_SEQUENCE_COLUMN, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", NULL, -1, "Nucleotide sequences", true) < 0) { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; } // Adding id column - if (obi_view_add_column(view, ID_COLUMN, -1, OBI_STR, 0, 1, ID_COLUMN, "", NULL, -1, "Ids", true) < 0) + if (obi_view_add_column(view, ID_COLUMN, -1, ID_COLUMN, OBI_STR, 0, 1, ID_COLUMN, "", NULL, -1, "Ids", true) < 0) { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; } // Adding definition column - if (obi_view_add_column(view, DEFINITION_COLUMN, -1, OBI_STR, 0, 1, DEFINITION_COLUMN, "", NULL, -1, "Definitions", true) < 0) + if (obi_view_add_column(view, DEFINITION_COLUMN, -1, DEFINITION_COLUMN, OBI_STR, 0, 1, DEFINITION_COLUMN, "", NULL, -1, "Definitions", true) < 0) { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; } // Adding quality column associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN); - if (obi_view_add_column(view, QUALITY_COLUMN, -1, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association + if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; @@ -1131,8 +1297,11 @@ int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos) Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name) { - Obiview_p view; - int i; + Obiview_p view; + const char* column_name; + obiversion_t column_version; + OBIDMS_column_p column_pointer; + int i; // Alllocate the memory for the view structure view = (Obiview_p) malloc(sizeof(Obiview_t)); @@ -1164,13 +1333,18 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name) // Open the columns to read for (i=0; i < ((view->infos)->column_count); i++) { - (view->columns)[i] = obi_open_column(dms, (((view->infos)->column_references)+i)->column_name, (((view->infos)->column_references)+i)->version); - if ((view->columns)[i] == NULL) + column_name = ((((view->infos)->column_references)[i]).column_refs).column_name; + column_version = ((((view->infos)->column_references)[i]).column_refs).version; + + column_pointer = obi_open_column(dms, column_name, column_version); + if (column_pointer == NULL) { - obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, (((view->infos)->column_references)+i)->column_name, (((view->infos)->column_references)+i)->version); + obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, column_name, column_version); obi_close_view(view); return NULL; } + (view->columns)[i] = column_pointer; + } view->dms = dms; @@ -1179,6 +1353,14 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name) view->nb_predicates = 0; view->predicate_functions = NULL; + // Create the column dictionary associating each column alias with its pointer + if (create_column_dict(view) < 0) + { + obidebug(1, "\nError creating the column dictionary when opening a view"); + obi_close_view(view); + return NULL; + } + return view; } @@ -1186,6 +1368,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name) int obi_view_add_column(Obiview_p view, const char* column_name, obiversion_t version_number, + const char* alias, OBIType_t data_type, index_t nb_lines, index_t nb_elements_per_line, @@ -1196,10 +1379,10 @@ int obi_view_add_column(Obiview_p view, const char* comments, bool create) // all infos for creation or open { - int i; - OBIDMS_column_p column; - OBIDMS_column_p column_buffer; - OBIDMS_column_p current_line_selection; + int i; + OBIDMS_column_p column; + OBIDMS_column_p column_buffer; + OBIDMS_column_p current_line_selection; // Check that the view is not read-only if (view->read_only) @@ -1275,14 +1458,29 @@ int obi_view_add_column(Obiview_p view, return -1; } - // Store column in the view + // Store column pointer in the view structure (view->columns)[(view->infos)->column_count] = column; + + // If an alias is not defined, it's the original name of the column. // TODO discuss + if (alias == NULL) + alias = column_name; + + // Save column alias + strcpy((((view->infos)->column_references)[(view->infos)->column_count]).alias, alias); + (view->infos)->column_count++; if ((view->infos)->column_count == 1) // first column in the view - (view->infos)->line_count = (column->header)->lines_used; + (view->infos)->line_count = nb_lines; - // Update reference in view infos - update_column_refs(view); + // Update column references and dictionary + update_column_refs_and_dict(view); + +// // Print dict +// for (i=0; i<((view->infos)->column_count); i++) +// { +// fprintf(stderr, "\n\nalias: %s", (((view->infos)->column_references)[i]).alias); +// fprintf(stderr, "\npointer: %x\n", obi_view_get_column(view, (((view->infos)->column_references)[i]).alias)); +// } return 0; } @@ -1303,59 +1501,107 @@ int obi_view_delete_column(Obiview_p view, const char* column_name) return -1; } - found = 0; - + found = false; for (i=0; i<((view->infos)->column_count); i++) { - if (!strcmp((((view->columns)[i])->header)->name, column_name)) + if ((!found) && (!strcmp((((view->infos)->column_references)[i]).alias, column_name))) { obi_close_column((view->columns)[i]); - found = 1; + found = true; } if (found) { if (i != (((view->infos)->column_count) - 1)) // not the last one + { // Shift the pointer and the references (view->columns)[i] = (view->columns)[i+1]; + strcpy((((view->infos)->column_references)[i]).alias, (((view->infos)->column_references)[i+1]).alias); + strcpy(((((view->infos)->column_references)[i]).column_refs).column_name, ((((view->infos)->column_references)[i+1]).column_refs).column_name); + ((((view->infos)->column_references)[i]).column_refs).version = ((((view->infos)->column_references)[i+1]).column_refs).version; + } else // Last column (view->columns)[i] = NULL; } } if (!found) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError trying to delete a column: column not found"); return -1; + } ((view->infos)->column_count)--; - // Update reference in view infos - update_column_refs(view); + // Update column dictionary + update_column_dict(view); return 0; } OBIDMS_column_p obi_view_get_column(Obiview_p view, const char* column_name) +{ + return (OBIDMS_column_p)(ht_get(view->column_dict, column_name)); +} + + +OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name) { int i; - for (i=0; i<((view->infos)->column_count); i++) + for (i=0; i < (view->infos)->column_count; i++) { - if (!(strcmp((((view->columns)[i])->header)->name, column_name))) - return (view->columns)[i]; + if (strcmp((((view->infos)->column_references)[i]).alias, column_name) == 0) + return ((view->columns)+i); } + obidebug(1, "\nError: column not found"); return NULL; } -OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name) // TODO delete? +int obi_view_create_column_alias(Obiview_p view, const char* current_name, const char* alias) { int i; + bool found; + // Check that the view is not read-only + if (view->read_only) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError trying to change a column alias in a read-only view"); + return -1; + } + + // Check that the new alias is unique + if (ht_get(view->column_dict, alias) != NULL) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError: the new name/alias identifying a column in a view is not unique"); + return -1; + } + + // Set the new alias in the column references + found = false; for (i=0; i<((view->infos)->column_count); i++) { - if (!(strcmp((((view->columns)[i])->header)->name, column_name))) - return ((view->columns)+i); + if (!strcmp((((view->infos)->column_references)[i]).alias, current_name)) + { + strcpy((((view->infos)->column_references)[i]).alias, alias); + found = true; + } } - return NULL; + + if (found == false) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError: can't find the column '%s' to change its alias", current_name); + return -1; + } + + // Update the column dictionary + update_column_dict(view); + + return 0; } @@ -1503,6 +1749,9 @@ int obi_close_view(Obiview_p view) } } + // Free the column dictionary + ht_free(view->column_dict); + // Unmap view file if (obi_view_unmap_file(view->dms, view->infos) < 0) { diff --git a/src/obiview.h b/src/obiview.h index 4cb27eb..e3d7a20 100644 --- a/src/obiview.h +++ b/src/obiview.h @@ -25,6 +25,7 @@ #include "obidms.h" #include "obidmscolumn.h" #include "obierrno.h" +#include "hashtable.h" #define OBIVIEW_NAME_MAX_LENGTH (1000) /**< The maximum length of an OBIDMS view name. @@ -54,6 +55,18 @@ */ +/** + * @brief Structure for column aliases. + * Column aliases are alternative names used to identify a column in the context of a view. + */ +typedef struct Alias_column_pair { + Column_reference_t column_refs; /**< References (name and version) of the column. + */ + char alias[OBIDMS_COLUMN_MAX_NAME+1]; /**< Alias of the column in the context of a view. + */ +} Alias_column_pair_t, *Alias_column_pair_p; + + /** * @brief Structure for a closed view stored in the view file. * Views are identified by their name. @@ -77,8 +90,8 @@ typedef struct Obiview_infos { */ int column_count; /**< The number of columns in the view. */ - Column_reference_t column_references[MAX_NB_OPENED_COLUMNS]; /**< References (name and version) for all the columns in the view. - */ + Alias_column_pair_t column_references[MAX_NB_OPENED_COLUMNS]; /**< References (name, version and alias) for all the columns in the view. + */ char comments[OBIVIEW_COMMENTS_MAX_LENGTH+1]; /**< Comments, additional informations on the view. */ } Obiview_infos_t, *Obiview_infos_p; @@ -88,27 +101,30 @@ typedef struct Obiview_infos { * @brief Structure for an opened view. */ typedef struct Obiview { - Obiview_infos_p infos; /**< A pointer on the mapped view informations. - */ - OBIDMS_p dms; /**< A pointer on the DMS to which the view belongs. - */ - bool read_only; /**< Whether the view is read-only or can be modified. + Obiview_infos_p infos; /**< A pointer on the mapped view informations. + */ + OBIDMS_p dms; /**< A pointer on the DMS to which the view belongs. + */ + bool read_only; /**< Whether the view is read-only or can be modified. */ - OBIDMS_column_p line_selection; /**< A pointer on the column containing the line selection + OBIDMS_column_p line_selection; /**< A pointer on the column containing the line selection * associated with the view if there is one. * This line selection is read-only, and when a line from the view is read, * it is this line selection that is used. */ - OBIDMS_column_p new_line_selection; /**< A pointer on the column containing the new line selection being built + OBIDMS_column_p new_line_selection; /**< A pointer on the column containing the new line selection being built * to associate with the view, if there is one. * When a line is selected with obi_select_line() or obi_select_lines(), * it is recorded in this line selection. */ - OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS]; /**< Array of pointers on all the columns of the view. + OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS]; /**< Array of pointers on all the columns of the view. */ - int nb_predicates; /**< Number of predicates to test when closing the view. + hashtable_p column_dict; /**< Hash table storing the pairs of column names or aliases with the associated + * column pointers. + */ + int nb_predicates; /**< Number of predicates to test when closing the view. */ - char* (**predicate_functions)(struct Obiview* view); /**< Array of pointers on all predicate functions to test when closing the view. + char* (**predicate_functions)(struct Obiview* view); /**< Array of pointers on all predicate functions to test when closing the view. */ } Obiview_t, *Obiview_p; @@ -275,6 +291,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name); * @param view A pointer on the view. * @param column_name The name of the column. * @param version_number The version of the column if it should be opened and not created (if -1, the latest version is retrieved). + * @param alias The unique name used to identify the column in the context of this view. * @param data_type The OBIType code of the data. * @param nb_lines The number of lines to be stored. * @param nb_elements_per_line The number of elements per line. @@ -293,6 +310,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name); int obi_view_add_column(Obiview_p view, const char* column_name, obiversion_t version_number, + const char* alias, OBIType_t data_type, index_t nb_lines, index_t nb_elements_per_line, @@ -354,6 +372,27 @@ OBIDMS_column_p obi_view_get_column(Obiview_p view, const char* column_name); OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name); +/** + * @brief Changes the name that identifies a column in the context of a view. + * + * In the context of a view, each column is identified by a name that is unique in this view. + * + * @warning The view must be writable. + * + * @param view A pointer on the view. + * @param current_name The current name that identifies the column in this view. + * @param alias The new name that should be used to identify the column in this view. + * + * @returns A value indicating the success of the operation. + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since July 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_view_create_column_alias(Obiview_p view, const char* current_name, const char* alias); + + /** * @brief Selects a line in the context of a view. * @@ -450,22 +489,7 @@ int obi_close_view(Obiview_p view); int obi_save_and_close_view(Obiview_p view); -/** - * @brief Closes the structure containing all the informations written in the view file. - * - * @param views A pointer on the view informations structure. - * - * @returns A value indicating the success of the operation. - * @retval 0 if the operation was successfully completed. - * @retval -1 if an error occurred. - * - * @since February 2016 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -int obi_close_view_infos(Obiviews_infos_all_p views); - - -// TODO in following functions would it be better to use column names instead of column pointers? +// in following functions would it be better to use column names instead of column pointers? // check if it would be a gain or loss of time /**