From 4df313c54aec78ee3eba50723d2af36c49e28372 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Thu, 25 Feb 2016 09:43:27 +0100 Subject: [PATCH] Added Obiviews specialized for the handling of nucleotide sequences --- python/obitools3/obidms/_obidms.pxd | 13 +- python/obitools3/obidms/_obidms.pyx | 204 +++++++++++++++++++++-- python/obitools3/obidms/capi/obiview.pxd | 48 +++++- src/obiview.c | 159 +++++++++++++++++- src/obiview.h | 29 +++- 5 files changed, 434 insertions(+), 19 deletions(-) diff --git a/python/obitools3/obidms/_obidms.pxd b/python/obitools3/obidms/_obidms.pxd index 653e5cd..d7df7d0 100644 --- a/python/obitools3/obidms/_obidms.pxd +++ b/python/obitools3/obidms/_obidms.pxd @@ -64,6 +64,15 @@ cdef class OBIView: cpdef save_and_close(self) +cdef class OBIView_NUC_SEQS(OBIView): + + cdef OBIDMS_column ids + cdef OBIDMS_column sequences + cdef OBIDMS_column descriptions + + cpdef delete_column(self, str column_name) + + cdef class OBIView_line : cdef index_t index @@ -77,5 +86,7 @@ cdef class OBIDMS: cpdef close(self) cpdef OBIView open_view(self, str view_name) - cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*) + cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*) + cpdef dict read_view_infos(self, str view_name) + cpdef dict read_views(self) diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx index d723e79..54c00ff 100644 --- a/python/obitools3/obidms/_obidms.pyx +++ b/python/obitools3/obidms/_obidms.pyx @@ -48,16 +48,26 @@ from ._obidmscolumn_seq cimport OBIDMS_column_seq, \ from ._obidms cimport OBIView, OBIView_line from .capi.obiview cimport Obiview_p, \ + Obiviews_infos_all_p, \ + Obiview_infos_p, \ + Column_reference_p, \ + obi_new_view_nuc_seqs, \ obi_new_view, \ obi_new_view_cloned_from_name, \ + obi_new_view_nuc_seqs_cloned_from_name, \ obi_open_view, \ + obi_read_views, \ obi_view_delete_column, \ obi_view_add_column, \ obi_view_get_column, \ obi_view_get_pointer_on_column_in_view, \ obi_select_line, \ obi_select_lines, \ - obi_save_and_close_view + obi_save_and_close_view, \ + VIEW_TYPE_NUC_SEQS, \ + NUC_SEQUENCE_COLUMN, \ + ID_COLUMN, \ + DESCRIPTION_COLUMN from libc.stdlib cimport malloc from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer @@ -380,7 +390,6 @@ cdef class OBIView : raise Exception("Problem adding a column in a view") # Store the column pointer - column_pp = obi_view_get_pointer_on_column_in_view(self.pointer, column_name_b) if column_pp == NULL : raise Exception("Problem getting a column in a view") @@ -451,6 +460,112 @@ cdef class OBIView : ############################################# +cdef class OBIView_NUC_SEQS(OBIView): + + def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None): + + cdef Obiview_p view = NULL + cdef int i + cdef list col_list + cdef str col_name + cdef OBIDMS_column column + cdef OBIDMS_column_p column_p + cdef OBIDMS_column_p* column_pp + cdef OBIDMS_column_header_p header + cdef index_t* line_selection_p + + self.dms = dms + + if line_selection is not None : + line_selection_p = malloc((len(line_selection) + 1) * sizeof(index_t)) + for i in range(len(line_selection)) : + line_selection_p[i] = line_selection[i] # TODO type problem? + line_selection_p[len(line_selection)] = -1 + else : + line_selection_p = NULL + + if new : + if view_to_clone is not None : + if type(view_to_clone) == str : + view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p) + else : + view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), ( view_to_clone).pointer, line_selection_p) + elif view_to_clone is None : + view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p) + elif not new : + if view_name is not None : + view = obi_open_view(dms.pointer, str2bytes(view_name)) + elif view_name is None : + view = obi_open_view(dms.pointer, NULL) + + if view == NULL : + raise Exception("Error creating/opening view") + + self.pointer = view + self.name = bytes2str(view.name) + + # go through columns to build list and open python object (TODO make separate function?) + self.columns = {} + self.columns_pp = {} + + i = 0 + while i < view.column_count : + column_pp = ((view.columns)+i) + column_p = (view.columns)[i] + header = (column_p).header + + col_name = bytes2str(header.name) + col_capsule = PyCapsule_New(column_pp, NULL, NULL) # TODO discuss + (self.columns_pp)[col_name] = col_capsule + + subclass = OBIDMS_column.get_subclass_type(column_p) + self.columns[col_name] = subclass(self, col_name) + + i+=1 + + self.ids = self.columns[bytes2str(ID_COLUMN)] + self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)] + self.descriptions = self.columns[bytes2str(DESCRIPTION_COLUMN)] + + + cpdef delete_column(self, str column_name) : + + cdef int i + cdef Obiview_p view + cdef OBIDMS_column column + cdef OBIDMS_column_p column_p + cdef OBIDMS_column_p* column_pp + cdef OBIDMS_column_header_p header + cdef str column_n + + if ((column_name == bytes2str(ID_COLUMN)) or (column_name == bytes2str(NUC_SEQUENCE_COLUMN)) or (column_name == bytes2str(DESCRIPTION_COLUMN))) : + raise Exception("Can't delete an obligatory column from a NUC_SEQS view") + + view = self.pointer + + if obi_view_delete_column(view, str2bytes(column_name)) < 0 : + raise Exception("Problem deleting a column from a view") + + # Update the dictionaries of column pointers and column objects, and update pointers in column objects (make function?): + (self.columns).pop(column_name) + (self.columns_pp).pop(column_name) + + i = 0 + while i < view.column_count : + column_pp = ((view.columns)+i) + column_p = (view.columns)[i] + header = (column_p).header + col_name = bytes2str(header.name) + col_capsule = PyCapsule_New(column_pp, NULL, NULL) + (self.columns_pp)[col_name] = col_capsule + i+=1 + + for column_n in self.columns : + (self.columns[column_n]).update_pointer() + + +############################################# + cdef class OBIView_line : def __init__(self, OBIView view, index_t line_nb) : @@ -520,12 +635,81 @@ cdef class OBIDMS : cpdef close(self) : if (obi_close_dms(self.pointer)) < 0 : raise Exception("Problem closing an OBIDMS") - - - cpdef OBIView open_view(self, str view_name) : - return OBIView(self, view_name) - - - cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None) : - return OBIView(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection) + + cpdef OBIView open_view(self, str view_name) : + + cdef object view_class + cdef dict view_infos + + view_infos = self.read_view_infos(view_name) + + if view_infos["view_type"] == bytes2str(VIEW_TYPE_NUC_SEQS) : # TODO not gonna work + view_class = OBIView_NUC_SEQS + else : + view_class = OBIView + + return view_class(self, view_name) + + + cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None) : + + cdef object view_class + + if view_type is not None : + if view_type == bytes2str(VIEW_TYPE_NUC_SEQS) : # TODO not gonna work + view_class = OBIView_NUC_SEQS + else : + view_class = OBIView + + return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection) + + + cpdef dict read_view_infos(self, str view_name) : + all_views = self.read_views() + return all_views[view_name] + + + cpdef dict read_views(self) : # TODO function that gets 1 view with name, function that prints the dic and function that prints 1 view. Add column type in col ref + + cdef Obiviews_infos_all_p all_views_p + cdef Obiview_infos_p view_p + cdef Column_reference_p column_refs + cdef int nb_views + cdef int i, j + cdef str view_name + cdef str column_name + cdef dict views + cdef bytes name_b + + views = {} + all_views_p = obi_read_views(self.pointer) + nb_views = (all_views_p.header).view_count + for i in range(nb_views) : + view_p = ( (all_views_p.view_infos)) + i + view_name = bytes2str(view_p.name) + views[view_name] = {} + views[view_name]["view_type"] = bytes2str(view_p.view_type) + views[view_name]["column_count"] = view_p.column_count + views[view_name]["line_count"] = view_p.line_count + views[view_name]["view_number"] = view_p.view_number + views[view_name]["created_from"] = bytes2str(view_p.created_from) + views[view_name]["creation_date"] = bytes2str(obi_column_format_date(view_p.creation_date)) # TODO move this function in utils or somethings + if (view_p.all_lines) : + views[view_name]["line_selection"] = None + else : + views[view_name]["line_selection"] = {} + views[view_name]["line_selection"]["column_name"] = bytes2str((view_p.line_selection).column_name) + views[view_name]["line_selection"]["version"] = (view_p.line_selection).version + views[view_name]["column_references"] = {} + column_refs = view_p.column_references + for j in range(views[view_name]["column_count"]) : + column_name = bytes2str((column_refs[j]).column_name) + views[view_name]["column_references"][column_name] = {} + views[view_name]["column_references"][column_name]["version"] = column_refs[j].version + + return views + + + + \ No newline at end of file diff --git a/python/obitools3/obidms/capi/obiview.pxd b/python/obitools3/obidms/capi/obiview.pxd index b5df3b6..3733eed 100644 --- a/python/obitools3/obidms/capi/obiview.pxd +++ b/python/obitools3/obidms/capi/obiview.pxd @@ -3,15 +3,21 @@ from .obitypes cimport const_char_p, \ OBIType_t, \ obiversion_t, \ - index_t + index_t, \ + time_t from ..capi.obidms cimport OBIDMS_p from ..capi.obidmscolumn cimport OBIDMS_column_p cdef extern from "obiview.h" nogil: + extern const_char_p VIEW_TYPE_NUC_SEQS + extern const_char_p NUC_SEQUENCE_COLUMN + extern const_char_p ID_COLUMN + extern const_char_p DESCRIPTION_COLUMN + struct Column_reference_t : - char column_name + const_char_p column_name obiversion_t version ctypedef Column_reference_t* Column_reference_p @@ -31,12 +37,48 @@ cdef extern from "obiview.h" nogil: ctypedef Obiview_t* Obiview_p + struct Obiview_infos_t : + int view_number + int column_count + index_t line_count + const_char_p name + const_char_p created_from + time_t creation_date + bint all_lines + Column_reference_t line_selection + Column_reference_p column_references + const_char_p view_type + + ctypedef Obiview_infos_t* Obiview_infos_p + + + struct Obiviews_header_t : + size_t header_size + size_t views_size + int view_count + + ctypedef Obiviews_header_t* Obiviews_header_p + + + struct Obiviews_infos_all_t : + Obiviews_header_p header + Obiview_infos_p view_infos + + ctypedef Obiviews_infos_all_t* Obiviews_infos_all_p + + + Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection) + Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection) - Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection) + Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection) + Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection) + Obiview_p obi_open_view(OBIDMS_p dms, const_char_p view_name) + Obiviews_infos_all_p obi_read_views(OBIDMS_p dms) + int obi_view_add_column(Obiview_p view, const_char_p column_name, obiversion_t version_number, diff --git a/src/obiview.c b/src/obiview.c index f27670d..f68d82b 100644 --- a/src/obiview.c +++ b/src/obiview.c @@ -209,6 +209,50 @@ int create_obiview_file(int dms_file_descriptor) **********************************************************************/ +Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection) +{ + Obiview_p view; + + if (view_to_clone != NULL) + { // TODO check that the view to clone is already a NUC_SEQS view (discuss possibility of transforming type of a view) + if (strcmp(view_to_clone->view_type, VIEW_TYPE_NUC_SEQS)) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "Trying to clone a non-NUC SEQS view to create a NUC SEQS view"); + return NULL; + } + } + + view = obi_new_view(dms, view_name, view_to_clone, line_selection); + + strcpy(view->view_type, VIEW_TYPE_NUC_SEQS); + + if (view_to_clone == NULL) + { + // Adding sequence column + if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, OBI_SEQ, 0, 1, NUC_SEQUENCE_COLUMN, NUC_SEQUENCE_AVL, "Nucleotide sequences", true) < 0) + { + obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); + return NULL; + } + // Adding id column + if (obi_view_add_column(view, ID_COLUMN, -1, OBI_STR, 0, 1, ID_COLUMN, ID_AVL, "Ids", true) < 0) + { + obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); + return NULL; + } + // Adding description column + if (obi_view_add_column(view, DESCRIPTION_COLUMN, -1, OBI_STR, 0, 1, DESCRIPTION_COLUMN, DESCRIPTION_AVL, "Descriptions", true) < 0) + { + obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); + return NULL; + } + } + + return view; +} + + Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection) { Obiview_p view; @@ -297,6 +341,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl } } + strcpy(view->view_type, view_to_clone->view_type); strcpy(view->created_from, view_to_clone->name); view->new_line_selection = NULL; } @@ -308,8 +353,9 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl view->line_count = 0; view->line_selection = NULL; view->new_line_selection = NULL; - (view->created_from)[0] = '\0'; - //view->columns = NULL; TODO + (view->created_from)[0] = '\0'; + (view->view_type)[0] = '\0'; + //view->columns = NULL; TODO } strcpy(view->name, view_name); @@ -328,6 +374,25 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con if (view_to_clone == NULL) return NULL; view = obi_new_view(dms, view_name, view_to_clone, line_selection); + + obi_close_view(view_to_clone); + + return view; +} + + +Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection) +{ + Obiview_p view; + Obiview_p view_to_clone; + + view_to_clone = obi_open_view(dms, view_to_clone_name); + if (view_to_clone == NULL) + return NULL; + view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection); + + obi_close_view(view_to_clone); + return view; } @@ -466,6 +531,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name) view->line_count = view_infos->line_count; strcpy(view->created_from, view_infos->created_from); strcpy(view->name, view_infos->name); + strcpy(view->view_type, view_infos->view_type); // Open the columns to read for (i=0; i<(view_infos->column_count); i++) @@ -493,6 +559,91 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name) } +Obiviews_infos_all_p obi_read_views(OBIDMS_p dms) +{ + char* view_file_name; + int obiview_file_descriptor; + size_t header_size; + Obiviews_header_p header; + Obiview_infos_p view_infos; + Obiviews_infos_all_p views; + + view_file_name = build_obiview_file_name(); + if (view_file_name == NULL) + return NULL; + + // Open view file, read header size and map header and views + obiview_file_descriptor = openat(dms->dir_fd, view_file_name, O_RDWR, 0777); + if (obiview_file_descriptor < 0) + { // No views yet + free(view_file_name); + return NULL; + } + + free(view_file_name); + + // Read the header size + if (read(obiview_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t))) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError reading the header size of an obiview file (trying to open a view when there are none?)"); + close(obiview_file_descriptor); + return NULL; + } + + // Map the header + header = mmap(NULL, + header_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + obiview_file_descriptor, + 0 + ); + if (header == MAP_FAILED) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError mmapping an obiview file header"); + close(obiview_file_descriptor); + return NULL; + } + + // Map the views + view_infos = mmap(NULL, + header->views_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + obiview_file_descriptor, + header_size + ); + if (view_infos == MAP_FAILED) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError mmapping the views from an obiview file"); + munmap(header, header_size); + close(obiview_file_descriptor); + return NULL; + } + + views = (Obiviews_infos_all_p) malloc(sizeof(Obiviews_infos_all_t)); + if (views == NULL) + { + obi_set_errno(OBIVIEW_ERROR); + obidebug(1, "\nError mmapping the views from an obiview file"); + munmap(view_infos, header->views_size); + munmap(header, header_size); + close(obiview_file_descriptor); + return NULL; + } + + views->header = header; + views->view_infos = view_infos; + + close(obiview_file_descriptor); + + return views; +} + + int obi_view_add_column(Obiview_p view, const char* column_name, obiversion_t version_number, @@ -605,6 +756,8 @@ int obi_view_delete_column(Obiview_p view, const char* column_name) int i; bool found; + // TODO check that not deleting an obligatory column? + // Check that the view is not read-only if (view->read_only) { @@ -879,6 +1032,8 @@ int obi_save_view(Obiview_p view) view_infos->creation_date = time(NULL); strcpy(view_infos->created_from, view->created_from); strcpy(view_infos->name, view->name); + strcpy(view_infos->view_type, view->view_type); + // Store reference for the line selection associated with that view if (view->new_line_selection != NULL) { diff --git a/src/obiview.h b/src/obiview.h index d006ef3..2f5eb89 100644 --- a/src/obiview.h +++ b/src/obiview.h @@ -30,6 +30,19 @@ */ #define OBIVIEW_FILE_NAME "obiviews" +#define VIEW_TYPE_MAX_NAME (1024) + +#define VIEW_TYPE_NUC_SEQS "NUC_SEQS_VIEW" + +#define NUC_SEQUENCE_COLUMN "NUC_SEQ" +#define NUC_SEQUENCE_AVL "NUC_SEQ_AVL" + +#define ID_COLUMN "ID" +#define ID_AVL "ID_AVL" + +#define DESCRIPTION_COLUMN "DESCRIPTION" +#define DESCRIPTION_AVL "DESCRIPTION_AVL" + #define LINES_COLUMN_NAME "LINES" @@ -69,6 +82,8 @@ typedef struct Obiview { int view_number; + char view_type[VIEW_TYPE_MAX_NAME+1]; + } Obiview_t, *Obiview_p; @@ -94,6 +109,8 @@ typedef struct Obiview_infos { Column_reference_t column_references[MAX_NB_OPENED_COLUMNS]; + char view_type[VIEW_TYPE_MAX_NAME+1]; + } Obiview_infos_t, *Obiview_infos_p; @@ -110,20 +127,26 @@ typedef struct Obiviews_header { /** * @brief . */ -typedef struct Obiviews { // not used +typedef struct Obiviews_infos_all { Obiviews_header_p header; - Obiview_infos_p views; + Obiview_infos_p view_infos; -} Obiviews_t, *Obiviews_p; +} Obiviews_infos_all_t, *Obiviews_infos_all_p; +Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection); + Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection); Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection); +Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection); + Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name); +Obiviews_infos_all_p obi_read_views(OBIDMS_p dms); + int obi_view_add_column(Obiview_p view, const char* column_name, obiversion_t version_number,