Major update: Column aliases. Columns are now identified in the context

of a view by an alias that can be modified.
This commit is contained in:
Celine Mercier
2016-08-01 18:25:30 +02:00
parent 3843485a04
commit 312f50ff0f
17 changed files with 760 additions and 114 deletions

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -18,7 +18,6 @@ cdef class OBIDMS_column:
cdef index_t nb_elements_per_line
cdef list elements_names
cpdef update_pointer(self)
cpdef list get_elements_names(self)
cpdef str get_data_type(self)
cpdef index_t get_nb_lines_used(self)
@ -52,6 +51,7 @@ cdef class OBIView:
cpdef add_column(self,
str column_name,
obiversion_t version_number=*,
str alias=*,
str type=*,
index_t nb_lines=*,
index_t nb_elements_per_line=*,
@ -62,6 +62,8 @@ cdef class OBIView:
str comments=*,
bint create=*
)
cpdef change_column_alias(self, str current_alias, str new_alias)
cpdef update_column_pointers(self)
cpdef select_line(self, index_t line_nb)
cpdef select_lines(self, list line_selection)
cpdef save_and_close(self)

View File

@ -55,7 +55,7 @@ from ._obidmscolumn_seq cimport OBIDMS_column_seq, \
from .capi.obiview cimport Obiview_p, \
Obiview_infos_p, \
Column_reference_p, \
Alias_column_pair_p, \
obi_new_view_nuc_seqs, \
obi_new_view, \
obi_new_view_cloned_from_name, \
@ -65,7 +65,7 @@ from .capi.obiview cimport Obiview_p, \
obi_open_view, \
obi_view_delete_column, \
obi_view_add_column, \
obi_view_get_column, \
obi_view_create_column_alias, \
obi_view_get_column, \
obi_view_get_pointer_on_column_in_view, \
obi_select_line, \
@ -124,9 +124,6 @@ cdef class OBIDMS_column :
for line_nb in range(lines_used):
yield self.get_line(line_nb)
cpdef update_pointer(self):
self.pointer = <OBIDMS_column_p*> obi_view_get_pointer_on_column_in_view(self.view.pointer, str2bytes(self.column_name))
cpdef list get_elements_names(self):
return self.elements_names
@ -297,7 +294,7 @@ cdef class OBIView :
for i in range(view.infos.column_count) :
column_p = <OBIDMS_column_p> (view.columns)[i]
header = (column_p).header
col_name = bytes2str(header.name)
col_name = bytes2str(view.infos.column_references[i].alias)
subclass = OBIDMS_column.get_subclass_type(column_p)
self.columns[col_name] = subclass(self, col_name)
@ -306,7 +303,7 @@ cdef class OBIView :
cdef str s
s = str(self.name) + "\n" + str(self.comments) + "\n" + str(self.pointer.infos.line_count) + " lines\n"
for column_name in self.columns :
s = s + self.columns[column_name].__repr__() + '\n'
s = s + column_name + ": " + self.columns[column_name].__repr__() + '\n'
return s
@ -317,15 +314,15 @@ cdef class OBIView :
if obi_view_delete_column(self.pointer, str2bytes(column_name)) < 0 :
raise Exception("Problem deleting a column from a view")
# Update the dictionaries of column pointers and column objects, and update pointers in column objects (make function?):
# Update the dictionary of column objects:
(self.columns).pop(column_name)
for column_n in self.columns :
(self.columns[column_n]).update_pointer()
self.update_column_pointers()
cpdef add_column(self,
str column_name,
obiversion_t version_number=-1,
str alias='',
str type='',
index_t nb_lines=0,
index_t nb_elements_per_line=1,
@ -343,6 +340,11 @@ cdef class OBIView :
cdef OBIDMS_column_p column_p
column_name_b = str2bytes(column_name)
if alias == '' :
alias = column_name
alias_b = column_name_b
else :
alias_b = str2bytes(alias)
if nb_elements_per_line > 1 :
elements_names_b = str2bytes(';'.join(elements_names))
@ -367,7 +369,7 @@ cdef class OBIView :
else :
raise Exception("Invalid provided data type")
if (obi_view_add_column(self.pointer, column_name_b, version_number, # TODO should return pointer on column?
if (obi_view_add_column(self.pointer, column_name_b, version_number, alias_b, # TODO should return pointer on column?
data_type, nb_lines, nb_elements_per_line,
elements_names_b, str2bytes(indexer_name),
str2bytes(associated_column_name), associated_column_version,
@ -375,11 +377,27 @@ cdef class OBIView :
raise Exception("Problem adding a column in a view")
# Get the column pointer
column_p = obi_view_get_column(self.pointer, column_name_b)
column_p = obi_view_get_column(self.pointer, alias_b)
# Open and store the subclass
subclass = OBIDMS_column.get_subclass_type(column_p)
(self.columns)[column_name] = subclass(self, column_name)
(self.columns)[alias] = subclass(self, alias)
cpdef change_column_alias(self, str current_alias, str new_alias):
if (obi_view_create_column_alias(self.pointer, str2bytes(current_alias), str2bytes(new_alias)) < 0) :
raise Exception("Problem changing a column alias")
# Update the dictionaries of column column objects
self.columns[new_alias] = self.columns[current_alias]
(self.columns).pop(current_alias)
cpdef update_column_pointers(self):
cdef str column_n
cdef OBIDMS_column column
for column_n in self.columns :
column = self.columns[column_n]
column.pointer = <OBIDMS_column_p*> obi_view_get_pointer_on_column_in_view(self.pointer, str2bytes(column_n))
cpdef save_and_close(self) :
@ -488,7 +506,7 @@ cdef class OBIView_NUC_SEQS(OBIView):
for i in range(view.infos.column_count) :
column_p = <OBIDMS_column_p> (view.columns)[i]
header = (column_p).header
col_name = bytes2str(header.name)
col_name = bytes2str(view.infos.column_references[i].alias)
subclass = OBIDMS_column.get_subclass_type(column_p)
self.columns[col_name] = subclass(self, col_name)
@ -548,7 +566,7 @@ cdef class OBIView_line :
(((self.view).columns)[column_name]).set_line(self.index, value)
def __contains__(self, str column_name):
return (column_name in self.view)
return (column_name in self.view.columns)
def __repr__(self):
cdef dict line
@ -618,7 +636,7 @@ cdef class OBIDMS :
cdef Obiview_infos_p view_infos_p
cdef dict view_infos_d
cdef Column_reference_p column_refs
cdef Alias_column_pair_p column_refs
cdef int i, j
cdef str column_name
@ -638,11 +656,12 @@ cdef class OBIDMS :
view_infos_d["line_selection"]["column_name"] = bytes2str((view_infos_p.line_selection).column_name)
view_infos_d["line_selection"]["version"] = <int> (view_infos_p.line_selection).version
view_infos_d["column_references"] = {}
column_refs = view_infos_p.column_references
column_references = view_infos_p.column_references
for j in range(view_infos_d["column_count"]) :
column_name = bytes2str((column_refs[j]).column_name)
column_name = bytes2str((column_references[j]).alias)
view_infos_d["column_references"][column_name] = {}
view_infos_d["column_references"][column_name]["version"] = column_refs[j].version
view_infos_d["column_references"][column_name]["original_name"] = bytes2str((column_references[j]).column_refs.column_name)
view_infos_d["column_references"][column_name]["version"] = (column_references[j]).column_refs.version
obi_view_unmap_file(self.pointer, view_infos_p)

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h

View File

@ -26,6 +26,13 @@ cdef extern from "obiview.h" nogil:
extern const_char_p QUALITY_COLUMN
struct Alias_column_pair_t :
Column_reference_t column_refs
const_char_p alias
ctypedef Alias_column_pair_t* Alias_column_pair_p
struct Obiview_infos_t :
time_t creation_date
const_char_p name
@ -35,7 +42,7 @@ cdef extern from "obiview.h" nogil:
Column_reference_t line_selection
index_t line_count
int column_count
Column_reference_p column_references
Alias_column_pair_p column_references
const_char_p comments
ctypedef Obiview_infos_t* Obiview_infos_p
@ -48,6 +55,8 @@ cdef extern from "obiview.h" nogil:
OBIDMS_column_p line_selection
OBIDMS_column_p new_line_selection
OBIDMS_column_p columns
int nb_predicates
# TODO declarations for column dictionary and predicate function array?
ctypedef Obiview_t* Obiview_p
@ -69,6 +78,7 @@ cdef extern from "obiview.h" nogil:
int obi_view_add_column(Obiview_p view,
const_char_p column_name,
obiversion_t version_number,
const_char_p alias,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
@ -89,6 +99,8 @@ cdef extern from "obiview.h" nogil:
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
int obi_save_view(Obiview_p view)
int obi_close_view(Obiview_p view)

197
src/hashtable.c Normal file
View File

@ -0,0 +1,197 @@
/****************************************************************************
* Hash table source file *
****************************************************************************/
/**
* @file hashtable.c
* @author Celine Mercier
* @date July 26th 2016
* @brief Source file for hash table functions.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include "string.h"
#include "murmurhash2.h"
#include "hashtable.h"
// Create a new hashtable
hashtable_p ht_create(size_t size)
{
hashtable_p hashtable = NULL;
size_t i;
// Allocate the table
hashtable = malloc(sizeof(hashtable_t));
if (hashtable == NULL)
return NULL;
// Allocate the head nodes
hashtable->table = malloc(size * sizeof(entry_p));
if (hashtable->table == NULL)
return NULL;
// Initialize the head nodes
for (i=0; i<size; i++)
hashtable->table[i] = NULL;
hashtable->size = size;
return hashtable;
}
// Create an entry
entry_p ht_new_entry(const char* key, void* value)
{
entry_p new_entry;
new_entry = malloc(sizeof(entry_t));
if (new_entry == NULL)
return NULL;
new_entry->key = strdup(key);
if (new_entry->key == NULL)
return NULL;
new_entry->value = value;
new_entry->next = NULL;
return new_entry;
}
// Delete an entry
int ht_delete_entry(hashtable_p hashtable, const char* key)
{
entry_p last = NULL;
entry_p entry = NULL;
size_t bin = 0;
bin = murmurhash2(key, strlen(key), SEED);
bin = bin % hashtable->size;
// Step through the bin looking for the value
entry = hashtable->table[bin];
while ((entry != NULL) && (strcmp(key, entry->key ) != 0))
{
last = entry;
entry = entry->next;
}
if (entry == NULL) // key not found
return -1;
// Link the entries before and after the entry
if (last != NULL) // If not head node
last->next = entry->next;
else // If head node
hashtable->table[bin] = entry->next;
// Free the entry
free(entry->key);
free(entry->value);
free(entry);
return 0;
}
// Set a new entry in the hash table. If the key is already in the table, the value is replaced by the new one
int ht_set(hashtable_p hashtable, const char* key, void* value)
{
size_t bin = 0;
entry_p new_entry = NULL;
entry_p next = NULL;
entry_p last = NULL;
if ((key == NULL) || (value == NULL))
return -1;
bin = murmurhash2(key, strlen(key), SEED);
bin = bin % hashtable->size;
next = hashtable->table[bin];
while ((next != NULL) && (strcmp(key, next->key) != 0))
{
last = next;
next = next->next;
}
// If the key is already in the table, the value is replaced
if ((next != NULL) && (strcmp(key, next->key) == 0))
new_entry->value = value;
// Else, create the new entry and link it at the end of the list
else
{
// Create the new entry
new_entry = ht_new_entry(key, value);
if (new_entry == NULL)
return -1;
// If it is the first entry of that bin, we're at the head node of the list, and we replace it with the new entry
if (last == NULL)
hashtable->table[bin] = new_entry;
// Else link the new entry at the end of the list
else
last->next = new_entry;
}
return 0;
}
// Retrieve a value from a hash table
void* ht_get(hashtable_p hashtable, const char* key)
{
size_t bin = 0;
entry_p entry;
bin = murmurhash2(key, strlen(key), SEED);
bin = bin % hashtable->size;
// Step through the bin looking for the value
entry = hashtable->table[bin];
while ((entry != NULL) && (strcmp(key, entry->key ) != 0))
entry = entry->next;
if (entry == NULL)
return NULL;
else
return entry->value;
}
// Free the hash table
void ht_free(hashtable_p hashtable)
{
size_t i;
entry_p entry;
entry_p next;
for (i=0; i < hashtable->size; i++)
{
next = hashtable->table[i];
while (next != NULL)
{
entry = next;
free(entry->key);
next = entry->next;
free(entry);
}
}
free(hashtable->table);
free(hashtable);
}

123
src/hashtable.h Normal file
View File

@ -0,0 +1,123 @@
/****************************************************************************
* Hash table header file *
****************************************************************************/
/**
* @file hashtable.h
* @author Celine Mercier
* @date July 26th 2016
* @brief Header file for hash table functions.
*/
#ifndef HASHTABLE_H_
#define HASHTABLE_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#define SEED (0x9747b28c) /**< The seed used by the hash function.
*/
/**
* @brief Structure for an entry.
*/
typedef struct entry_s {
char* key; /**< Key used to refer to the entry.
*/
void* value; /**< Pointer on the value to be stored.
*/
struct entry_s* next; /**< Pointer on the next entry in the bin.
*/
} entry_t, *entry_p;
/**
* @brief Structure for a hash table.
*/
typedef struct hashtable {
size_t size; /**< Number of bins in the table.
*/
entry_p* table; /**< Table of bins.
*/
} hashtable_t, *hashtable_p;
/**
* @brief Creates a new hashtable.
*
* @param size The number of bins in the hash table.
*
* @returns A pointer to the newly created hash table.
* @retval NULL if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
hashtable_p ht_create(size_t size);
/**
* @brief Inserts a new entry in the hash table.
* If the key is already in the table, the value is replaced by the new one.
*
* @param hashtable A pointer on the hash table structure.
* @param key The key.
* @param value A pointer on the value associated with the key.
*
* @retval 0 if the entry was correctly set.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int ht_set(hashtable_p hashtable, const char* key, void* value);
/**
* @brief Retrieves a value from a hash table.
*
* @param hashtable A pointer on the hash table structure.
* @param key The key.
*
* @returns A pointer on the value associated with the key.
* @retval NULL if the key was not found.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void* ht_get(hashtable_p hashtable, const char* key);
/**
* @brief Deletes an entry.
*
* @param hashtable A pointer on the hash table structure.
* @param key The key.
*
* @retval 0 if the entry was correctly deleted.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int ht_delete_entry(hashtable_p hashtable, const char* key);
/**
* @brief Frees a hash table.
*
* @param hashtable A pointer on the hash table structure.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void ht_free(hashtable_p hashtable);
#endif /* HASHTABLE_H_ */

View File

@ -30,6 +30,7 @@
#include "obierrno.h"
#include "obidebug.h"
#include "obilittlebigman.h"
#include "hashtable.h"
#include "utils.h"
@ -42,6 +43,16 @@
*
**************************************************************************/
/**
* Internal function calculating the size of the file where the informations about an obiview are stored.
*
* @returns The size of the file in bytes.
*
* @since June 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t get_platform_view_file_size();
/**
* Internal function building the file name where the informations about an obiview are stored.
@ -82,6 +93,14 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name);
* The column references stored in the mapped view infos structures are updated
* to match the columns opened in the opened view structure.
*
* @warning The column pointer array should be up to date before using this function.
* @warning Aliases are not updated by this function and have to be edited separately.
* This function simply reads the column pointer array associated with the view
* and fills the column names and versions in the column reference array accordingly,
* without touching the alias.
* That means that for example if there is a shift in the column pointer array, this
* function should not be used.
*
* @param view A pointer on the view.
*
* @since June 2016
@ -90,6 +109,69 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name);
void update_column_refs(Obiview_p view);
/**
* @brief Internal function creating the column dictionary associated with a view.
*
* The column dictionary is built from the column references array, and associates each column alias
* with the pointer on the column.
*
* @warning The column reference array and the column pointer array should be up to date before using this function.
*
* @param view A pointer on the view.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int create_column_dict(Obiview_p view);
/**
* @brief Internal function updating the column dictionary associated with a view.
*
* The column dictionary is built from the column references array, and associates each column alias
* with the pointer on the column.
*
* @warning The column reference array and the column pointer array should be up to date before using this function.
*
* @param view A pointer on the view.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int update_column_dict(Obiview_p view);
/**
* @brief Internal function updating the column reference array and the column dictionary associated with a view.
*
* The column reference array is updated from the column pointer array, then the column dictionary that
* and associates each column alias with the pointer on the column is updated from the column reference array.
*
* @warning The column pointer array should be up to date before using this function.
* @warning Aliases are not updated by this function and have to be edited separately.
* This function simply reads the column pointer array associated with the view
* and fills the column names and versions in the column reference array accordingly,
* without touching the alias.
* That means that for example if there is a shift in the column pointer array, this
* function should not be used.
*
* @param view A pointer on the view.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int update_column_refs_and_dict(Obiview_p view);
/**
* @brief Internal function to update the line count in the context of a view.
*
@ -368,19 +450,72 @@ int create_obiview_file(OBIDMS_p dms, const char* view_name)
}
void update_column_refs(Obiview_p view)
{
int i;
for (i=0; i < (view->infos)->column_count; i++)
{
strcpy((((view->infos)->column_references)+i)->column_name, (((view->columns)[i])->header)->name);
(((view->infos)->column_references)+i)->version = (((view->columns)[i])->header)->version;
strcpy(((((view->infos)->column_references)[i]).column_refs).column_name, (((view->columns)[i])->header)->name);
((((view->infos)->column_references)[i]).column_refs).version = (((view->columns)[i])->header)->version;
}
}
int create_column_dict(Obiview_p view)
{
int i;
view->column_dict = ht_create(MAX_NB_OPENED_COLUMNS);
if (view->column_dict == NULL)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError creating a column dictionary");
return -1;
}
// Rebuild the dictionary from the column references and the column pointer array associated with the view
for (i=0; i < (view->infos)->column_count; i++)
{
// Check that each alias is unique
if (ht_get(view->column_dict, (((view->infos)->column_references)[i]).alias) != NULL)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError: the name/alias identifying a column in a view is not unique");
return -1;
}
if (ht_set(view->column_dict, (((view->infos)->column_references)[i]).alias, (view->columns)[i]) < 0)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError adding a column in a column dictionary");
return -1;
}
}
return 0;
}
int update_column_dict(Obiview_p view)
{
// Re-initialize the dictionary to rebuild it from scratch
ht_free(view->column_dict);
if (create_column_dict(view) < 0)
return -1;
return 0;
}
int update_column_refs_and_dict(Obiview_p view)
{
update_column_refs(view);
return update_column_dict(view);
}
int update_lines(Obiview_p view, index_t line_count)
{
int i;
@ -408,6 +543,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
OBIDMS_column_p current_line_selection = NULL;
OBIDMS_column_p column = NULL;
OBIDMS_column_p column_buffer;
bool found;
// Check that the view is not read-only
if (view->read_only)
@ -422,9 +558,10 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
else
current_line_selection = view->line_selection;
found = false;
for (i=0; i<((view->infos)->column_count); i++)
{
if ((current_line_selection != NULL) || (!(strcmp((((view->columns)[i])->header)->name, column_name))))
if ((current_line_selection != NULL) || (!strcmp((((view->infos)->column_references)[i]).alias, column_name)))
{ // Clone with the right line selection and replace (for all columns if there is a line selection)
// Save pointer to close column after cloning
@ -442,7 +579,7 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
// Close old cloned column
obi_close_column(column_buffer);
if (!(strcmp((((view->columns)[i])->header)->name, column_name)))
if (!strcmp((((view->infos)->column_references)[i]).alias, column_name))
{ // Found the column to return
column = (view->columns)[i];
}
@ -464,8 +601,8 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
view->new_line_selection = NULL;
}
// Update column references in view infos
update_column_refs(view);
// Update column refs and dict
update_column_refs_and_dict(view);
return column;
}
@ -473,7 +610,8 @@ OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, index_t* line_nb_p)
{
char* column_name;
int i;
char* column_name = NULL;
// Check that the view is not read-only
if (view->read_only)
@ -491,22 +629,25 @@ int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* column_pp, i
if (view->line_selection != NULL)
(*line_nb_p) = *(((index_t*) ((view->line_selection)->data)) + (*line_nb_p));
column_name = (char*) malloc(strlen(((*column_pp)->header)->name) * sizeof(char));
// Get the name/alias of the column from the pointer
for (i=0; i<((view->infos)->column_count); i++)
{
if (obi_view_get_column(view, (((view->infos)->column_references)[i]).alias) == *column_pp)
column_name = (((view->infos)->column_references)[i]).alias;
}
if (column_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError trying to allocate memory for a column name");
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to clone a column in a view: column alias not found from pointer");
return -1;
}
strcpy(column_name, ((*column_pp)->header)->name);
(*column_pp) = clone_column_in_view(view, column_name);
if ((*column_pp) == NULL)
{
obidebug(1, "\nError trying to clone a column to modify it");
return -1;
}
free(column_name);
}
if (((*line_nb_p)+1) > (view->infos)->line_count)
@ -797,6 +938,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
}
view->dms = dms;
view->read_only = 0;
// Create view file
if (create_obiview_file(dms, view_name) < 0)
@ -888,24 +1030,12 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
(view->infos)->line_count = (view_to_clone->infos)->line_count;
}
for (i=0; i<((view_to_clone->infos)->column_count); i++)
{
(view->columns)[i] = obi_open_column(dms, (((view_to_clone->columns)[i])->header)->name, (((view_to_clone->columns)[i])->header)->version);
if ((view->columns)[i] == NULL)
{
if (view->line_selection != NULL)
obi_close_column(view->line_selection);
obi_view_unmap_file(view->dms, view->infos);
free(view);
return NULL;
}
}
(view->infos)->column_count = (view_to_clone->infos)->column_count;
// Fill informations
strcpy((view->infos)->view_type, (view_to_clone->infos)->view_type);
strcpy((view->infos)->created_from, (view_to_clone->infos)->name);
view->new_line_selection = NULL;
}
// Else, fill empty view structure
else
{
@ -919,10 +1049,10 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
//view->columns = NULL; // TODO
}
// Fill last informations
strcpy((view->infos)->name, view_name);
strcpy((view->infos)->comments, comments);
(view->infos)->creation_date = time(NULL);
view->read_only = 0;
view->nb_predicates = 0;
view->predicate_functions = NULL;
@ -938,8 +1068,44 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl
((view->infos)->line_selection).version = ((view->line_selection)->header)->version;
}
// Store references for columns
update_column_refs(view);
// Create the column dictionary (hash table) associating column names (or aliases) to column pointers
if (create_column_dict(view) < 0)
{
obi_close_view(view);
return NULL;
}
// Once the view has been created with all its elements and informations, add the columns if the view is cloned from another view
// Add the columns from the view to clone in the new view
if (view_to_clone != NULL)
{
(view->infos)->column_count = 0;
for (i=0; i<((view_to_clone->infos)->column_count); i++)
{
if (obi_view_add_column(view,
(((view_to_clone->columns)[i])->header)->name,
(((view_to_clone->columns)[i])->header)->version,
(((view_to_clone->infos)->column_references)[i]).alias,
0,
(view->infos)->line_count,
0,
NULL,
NULL,
NULL,
-1,
NULL,
false)
< 0)
{
obidebug(1, "\nError adding a column in a new view from a view to clone");
if (view->line_selection != NULL)
obi_close_column(view->line_selection);
obi_view_unmap_file(view->dms, view->infos);
free(view);
return NULL;
}
}
}
return view;
}
@ -985,26 +1151,26 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
if (view_to_clone == NULL)
{
// Adding sequence column
if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", NULL, -1, "Nucleotide sequences", true) < 0)
if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NUC_SEQUENCE_COLUMN, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, "", NULL, -1, "Nucleotide sequences", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Adding id column
if (obi_view_add_column(view, ID_COLUMN, -1, OBI_STR, 0, 1, ID_COLUMN, "", NULL, -1, "Ids", true) < 0)
if (obi_view_add_column(view, ID_COLUMN, -1, ID_COLUMN, OBI_STR, 0, 1, ID_COLUMN, "", NULL, -1, "Ids", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Adding definition column
if (obi_view_add_column(view, DEFINITION_COLUMN, -1, OBI_STR, 0, 1, DEFINITION_COLUMN, "", NULL, -1, "Definitions", true) < 0)
if (obi_view_add_column(view, DEFINITION_COLUMN, -1, DEFINITION_COLUMN, OBI_STR, 0, 1, DEFINITION_COLUMN, "", NULL, -1, "Definitions", true) < 0)
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Adding quality column
associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN);
if (obi_view_add_column(view, QUALITY_COLUMN, -1, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
@ -1131,8 +1297,11 @@ int obi_view_unmap_file(OBIDMS_p dms, Obiview_infos_p view_infos)
Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
{
Obiview_p view;
int i;
Obiview_p view;
const char* column_name;
obiversion_t column_version;
OBIDMS_column_p column_pointer;
int i;
// Alllocate the memory for the view structure
view = (Obiview_p) malloc(sizeof(Obiview_t));
@ -1164,13 +1333,18 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
// Open the columns to read
for (i=0; i < ((view->infos)->column_count); i++)
{
(view->columns)[i] = obi_open_column(dms, (((view->infos)->column_references)+i)->column_name, (((view->infos)->column_references)+i)->version);
if ((view->columns)[i] == NULL)
column_name = ((((view->infos)->column_references)[i]).column_refs).column_name;
column_version = ((((view->infos)->column_references)[i]).column_refs).version;
column_pointer = obi_open_column(dms, column_name, column_version);
if (column_pointer == NULL)
{
obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, (((view->infos)->column_references)+i)->column_name, (((view->infos)->column_references)+i)->version);
obidebug(1, "\nError opening a column for a view: column %d: %s, version %d", i, column_name, column_version);
obi_close_view(view);
return NULL;
}
(view->columns)[i] = column_pointer;
}
view->dms = dms;
@ -1179,6 +1353,14 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
view->nb_predicates = 0;
view->predicate_functions = NULL;
// Create the column dictionary associating each column alias with its pointer
if (create_column_dict(view) < 0)
{
obidebug(1, "\nError creating the column dictionary when opening a view");
obi_close_view(view);
return NULL;
}
return view;
}
@ -1186,6 +1368,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
int obi_view_add_column(Obiview_p view,
const char* column_name,
obiversion_t version_number,
const char* alias,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
@ -1196,10 +1379,10 @@ int obi_view_add_column(Obiview_p view,
const char* comments,
bool create) // all infos for creation or open
{
int i;
OBIDMS_column_p column;
OBIDMS_column_p column_buffer;
OBIDMS_column_p current_line_selection;
int i;
OBIDMS_column_p column;
OBIDMS_column_p column_buffer;
OBIDMS_column_p current_line_selection;
// Check that the view is not read-only
if (view->read_only)
@ -1275,14 +1458,29 @@ int obi_view_add_column(Obiview_p view,
return -1;
}
// Store column in the view
// Store column pointer in the view structure
(view->columns)[(view->infos)->column_count] = column;
// If an alias is not defined, it's the original name of the column. // TODO discuss
if (alias == NULL)
alias = column_name;
// Save column alias
strcpy((((view->infos)->column_references)[(view->infos)->column_count]).alias, alias);
(view->infos)->column_count++;
if ((view->infos)->column_count == 1) // first column in the view
(view->infos)->line_count = (column->header)->lines_used;
(view->infos)->line_count = nb_lines;
// Update reference in view infos
update_column_refs(view);
// Update column references and dictionary
update_column_refs_and_dict(view);
// // Print dict
// for (i=0; i<((view->infos)->column_count); i++)
// {
// fprintf(stderr, "\n\nalias: %s", (((view->infos)->column_references)[i]).alias);
// fprintf(stderr, "\npointer: %x\n", obi_view_get_column(view, (((view->infos)->column_references)[i]).alias));
// }
return 0;
}
@ -1303,59 +1501,107 @@ int obi_view_delete_column(Obiview_p view, const char* column_name)
return -1;
}
found = 0;
found = false;
for (i=0; i<((view->infos)->column_count); i++)
{
if (!strcmp((((view->columns)[i])->header)->name, column_name))
if ((!found) && (!strcmp((((view->infos)->column_references)[i]).alias, column_name)))
{
obi_close_column((view->columns)[i]);
found = 1;
found = true;
}
if (found)
{
if (i != (((view->infos)->column_count) - 1)) // not the last one
{ // Shift the pointer and the references
(view->columns)[i] = (view->columns)[i+1];
strcpy((((view->infos)->column_references)[i]).alias, (((view->infos)->column_references)[i+1]).alias);
strcpy(((((view->infos)->column_references)[i]).column_refs).column_name, ((((view->infos)->column_references)[i+1]).column_refs).column_name);
((((view->infos)->column_references)[i]).column_refs).version = ((((view->infos)->column_references)[i+1]).column_refs).version;
}
else // Last column
(view->columns)[i] = NULL;
}
}
if (!found)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to delete a column: column not found");
return -1;
}
((view->infos)->column_count)--;
// Update reference in view infos
update_column_refs(view);
// Update column dictionary
update_column_dict(view);
return 0;
}
OBIDMS_column_p obi_view_get_column(Obiview_p view, const char* column_name)
{
return (OBIDMS_column_p)(ht_get(view->column_dict, column_name));
}
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name)
{
int i;
for (i=0; i<((view->infos)->column_count); i++)
for (i=0; i < (view->infos)->column_count; i++)
{
if (!(strcmp((((view->columns)[i])->header)->name, column_name)))
return (view->columns)[i];
if (strcmp((((view->infos)->column_references)[i]).alias, column_name) == 0)
return ((view->columns)+i);
}
obidebug(1, "\nError: column not found");
return NULL;
}
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name) // TODO delete?
int obi_view_create_column_alias(Obiview_p view, const char* current_name, const char* alias)
{
int i;
bool found;
// Check that the view is not read-only
if (view->read_only)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError trying to change a column alias in a read-only view");
return -1;
}
// Check that the new alias is unique
if (ht_get(view->column_dict, alias) != NULL)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError: the new name/alias identifying a column in a view is not unique");
return -1;
}
// Set the new alias in the column references
found = false;
for (i=0; i<((view->infos)->column_count); i++)
{
if (!(strcmp((((view->columns)[i])->header)->name, column_name)))
return ((view->columns)+i);
if (!strcmp((((view->infos)->column_references)[i]).alias, current_name))
{
strcpy((((view->infos)->column_references)[i]).alias, alias);
found = true;
}
}
return NULL;
if (found == false)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError: can't find the column '%s' to change its alias", current_name);
return -1;
}
// Update the column dictionary
update_column_dict(view);
return 0;
}
@ -1503,6 +1749,9 @@ int obi_close_view(Obiview_p view)
}
}
// Free the column dictionary
ht_free(view->column_dict);
// Unmap view file
if (obi_view_unmap_file(view->dms, view->infos) < 0)
{

View File

@ -25,6 +25,7 @@
#include "obidms.h"
#include "obidmscolumn.h"
#include "obierrno.h"
#include "hashtable.h"
#define OBIVIEW_NAME_MAX_LENGTH (1000) /**< The maximum length of an OBIDMS view name.
@ -54,6 +55,18 @@
*/
/**
* @brief Structure for column aliases.
* Column aliases are alternative names used to identify a column in the context of a view.
*/
typedef struct Alias_column_pair {
Column_reference_t column_refs; /**< References (name and version) of the column.
*/
char alias[OBIDMS_COLUMN_MAX_NAME+1]; /**< Alias of the column in the context of a view.
*/
} Alias_column_pair_t, *Alias_column_pair_p;
/**
* @brief Structure for a closed view stored in the view file.
* Views are identified by their name.
@ -77,8 +90,8 @@ typedef struct Obiview_infos {
*/
int column_count; /**< The number of columns in the view.
*/
Column_reference_t column_references[MAX_NB_OPENED_COLUMNS]; /**< References (name and version) for all the columns in the view.
*/
Alias_column_pair_t column_references[MAX_NB_OPENED_COLUMNS]; /**< References (name, version and alias) for all the columns in the view.
*/
char comments[OBIVIEW_COMMENTS_MAX_LENGTH+1]; /**< Comments, additional informations on the view.
*/
} Obiview_infos_t, *Obiview_infos_p;
@ -88,27 +101,30 @@ typedef struct Obiview_infos {
* @brief Structure for an opened view.
*/
typedef struct Obiview {
Obiview_infos_p infos; /**< A pointer on the mapped view informations.
*/
OBIDMS_p dms; /**< A pointer on the DMS to which the view belongs.
*/
bool read_only; /**< Whether the view is read-only or can be modified.
Obiview_infos_p infos; /**< A pointer on the mapped view informations.
*/
OBIDMS_p dms; /**< A pointer on the DMS to which the view belongs.
*/
bool read_only; /**< Whether the view is read-only or can be modified.
*/
OBIDMS_column_p line_selection; /**< A pointer on the column containing the line selection
OBIDMS_column_p line_selection; /**< A pointer on the column containing the line selection
* associated with the view if there is one.
* This line selection is read-only, and when a line from the view is read,
* it is this line selection that is used.
*/
OBIDMS_column_p new_line_selection; /**< A pointer on the column containing the new line selection being built
OBIDMS_column_p new_line_selection; /**< A pointer on the column containing the new line selection being built
* to associate with the view, if there is one.
* When a line is selected with obi_select_line() or obi_select_lines(),
* it is recorded in this line selection.
*/
OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS]; /**< Array of pointers on all the columns of the view.
OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS]; /**< Array of pointers on all the columns of the view.
*/
int nb_predicates; /**< Number of predicates to test when closing the view.
hashtable_p column_dict; /**< Hash table storing the pairs of column names or aliases with the associated
* column pointers.
*/
int nb_predicates; /**< Number of predicates to test when closing the view.
*/
char* (**predicate_functions)(struct Obiview* view); /**< Array of pointers on all predicate functions to test when closing the view.
char* (**predicate_functions)(struct Obiview* view); /**< Array of pointers on all predicate functions to test when closing the view.
*/
} Obiview_t, *Obiview_p;
@ -275,6 +291,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
* @param view A pointer on the view.
* @param column_name The name of the column.
* @param version_number The version of the column if it should be opened and not created (if -1, the latest version is retrieved).
* @param alias The unique name used to identify the column in the context of this view.
* @param data_type The OBIType code of the data.
* @param nb_lines The number of lines to be stored.
* @param nb_elements_per_line The number of elements per line.
@ -293,6 +310,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
int obi_view_add_column(Obiview_p view,
const char* column_name,
obiversion_t version_number,
const char* alias,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
@ -354,6 +372,27 @@ OBIDMS_column_p obi_view_get_column(Obiview_p view, const char* column_name);
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name);
/**
* @brief Changes the name that identifies a column in the context of a view.
*
* In the context of a view, each column is identified by a name that is unique in this view.
*
* @warning The view must be writable.
*
* @param view A pointer on the view.
* @param current_name The current name that identifies the column in this view.
* @param alias The new name that should be used to identify the column in this view.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_view_create_column_alias(Obiview_p view, const char* current_name, const char* alias);
/**
* @brief Selects a line in the context of a view.
*
@ -450,22 +489,7 @@ int obi_close_view(Obiview_p view);
int obi_save_and_close_view(Obiview_p view);
/**
* @brief Closes the structure containing all the informations written in the view file.
*
* @param views A pointer on the view informations structure.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_view_infos(Obiviews_infos_all_p views);
// TODO in following functions would it be better to use column names instead of column pointers?
// in following functions would it be better to use column names instead of column pointers?
// check if it would be a gain or loss of time
/**