From e4129610cf9a42118f161c02368c9d33cd07be03 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Tue, 16 Aug 2016 15:17:26 +0200 Subject: [PATCH] Quality columns are now optional in NUC_SEQS views + minor fixes --- python/obitools3/commands/align.pyx | 2 ++ python/obitools3/commands/grep.pyx | 2 ++ python/obitools3/commands/import.pyx | 24 ++++++---------- python/obitools3/obidms/_obidms.pxd | 7 +---- python/obitools3/obidms/_obidms.pyx | 24 +++++++--------- python/obitools3/obidms/capi/obiview.pxd | 4 +-- src/obiview.c | 35 ++++++++++++++++-------- src/obiview.h | 8 ++++-- 8 files changed, 54 insertions(+), 52 deletions(-) diff --git a/python/obitools3/commands/align.pyx b/python/obitools3/commands/align.pyx index cfc6e61..4adb507 100644 --- a/python/obitools3/commands/align.pyx +++ b/python/obitools3/commands/align.pyx @@ -108,6 +108,8 @@ def run(config): # Call cython alignment function iview.align(oview) + print(oview.__repr__()) + iview.save_and_close() oview.save_and_close() d.close() diff --git a/python/obitools3/commands/grep.pyx b/python/obitools3/commands/grep.pyx index 1161e51..f64fe13 100644 --- a/python/obitools3/commands/grep.pyx +++ b/python/obitools3/commands/grep.pyx @@ -75,6 +75,8 @@ def run(config): # Create output view with the line selection oview = d.new_view(config['obi']['outputview'], view_to_clone=iview, line_selection=selection, comments="obi grep: "+config['grep']['predicate']+"\n") + print(oview.__repr__()) + iview.save_and_close() oview.save_and_close() d.close() diff --git a/python/obitools3/commands/import.pyx b/python/obitools3/commands/import.pyx index fb41c83..7cc753c 100644 --- a/python/obitools3/commands/import.pyx +++ b/python/obitools3/commands/import.pyx @@ -42,8 +42,6 @@ def addOptions(parser): type=str, required=True, help="Name of the default DMS for reading and writing data") - - group=parser.add_argument_group('obi import specific options') group.add_argument('--skip', action="store", dest="import:skip", @@ -88,10 +86,11 @@ def addOptions(parser): const='pep', help="Input file contains protein sequences") - - + + +# TODO: Handling of NA values def run(config): - pb = ProgressBar(35000000,config,seconde=5) + #pb = ProgressBar(35000000, config, seconde=5) inputs = uopen(config['import']['filename']) @@ -105,35 +104,28 @@ def run(config): get_quality = True else: raise RuntimeError('No file format specified') - - # Temporary way to handle NA values - #NA_list = ["nan"] - + # Create DMS d = OBIDMS(config['obi']['defaultdms']) # Create view - view = d.new_view(config['import']['destview'], view_type=view_type) + view = d.new_view(config['import']['destview'], view_type=view_type, quality_column=get_quality) i = 0 for seq in iseq: - pb(i) + #pb(i) view[i].set_id(seq['id']) view[i].set_definition(seq['definition']) view[i].set_sequence(seq['sequence']) if get_quality : view[i].set_quality(seq['quality']) for tag in seq['tags'] : - #print(tag, seq['tags'][tag]) - #if seq['tags'][tag] not in NA_list : view[i][tag] = seq['tags'][tag] i+=1 - #print(i) print(view.__repr__()) - view.save_and_close() + view.save_and_close() d.close() print("Done.") - \ No newline at end of file diff --git a/python/obitools3/obidms/_obidms.pxd b/python/obitools3/obidms/_obidms.pxd index 93c775d..3c88c01 100644 --- a/python/obitools3/obidms/_obidms.pxd +++ b/python/obitools3/obidms/_obidms.pxd @@ -70,11 +70,6 @@ cdef class OBIView: cdef class OBIView_NUC_SEQS(OBIView): - - cdef OBIDMS_column ids - cdef OBIDMS_column sequences - cdef OBIDMS_column definitions - cdef OBIDMS_column qualities cpdef delete_column(self, str column_name) cpdef align(self, @@ -101,7 +96,7 @@ cdef class OBIDMS: cpdef close(self) cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name) cpdef OBIView open_view(self, str view_name) - cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*) + cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*, bint quality_column=*) cpdef dict read_view_infos(self, str view_name) # cpdef dict read_views(self) TODO diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx index cbf73ad..23267af 100644 --- a/python/obitools3/obidms/_obidms.pyx +++ b/python/obitools3/obidms/_obidms.pyx @@ -247,7 +247,8 @@ cdef class OBIDMS_column_line : cdef class OBIView : - def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""): + def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments="", bint quality_column=False): + # TODO quality_column is only here because it's needed for OBIView_NUC_SEQS views, not clean cdef Obiview_p view = NULL cdef int i @@ -466,7 +467,7 @@ cdef class OBIView : cdef class OBIView_NUC_SEQS(OBIView): - def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""): + def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments="", bint quality_column=False): cdef Obiview_p view = NULL cdef int i @@ -490,11 +491,11 @@ cdef class OBIView_NUC_SEQS(OBIView): if new : if view_to_clone is not None : if type(view_to_clone) == str : - view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments)) + view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments), quality_column) else : - view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), ( view_to_clone).pointer, line_selection_p, str2bytes(comments)) + view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), ( view_to_clone).pointer, line_selection_p, str2bytes(comments), quality_column) elif view_to_clone is None : - view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments)) + view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments), quality_column) elif not new : if view_name is not None : view = obi_open_view(dms.pointer, str2bytes(view_name)) @@ -516,12 +517,7 @@ cdef class OBIView_NUC_SEQS(OBIView): col_name = bytes2str(view.infos.column_references[i].alias) subclass = OBIDMS_column.get_subclass_type(column_p) self.columns[col_name] = subclass(self, col_name) - - self.ids = self.columns[bytes2str(ID_COLUMN)] - self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)] - self.definitions = self.columns[bytes2str(DEFINITION_COLUMN)] - self.qualities = self.columns[bytes2str(QUALITY_COLUMN)] - + def __getitem__(self, object item) : if type(item) == str : return (self.columns)[item] @@ -690,7 +686,7 @@ cdef class OBIDMS : return view_class(self, view_name) - cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="") : + cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="", bint quality_column=False) : cdef object view_class @@ -706,8 +702,8 @@ cdef class OBIDMS : isinstance(view_to_clone, OBIView_NUC_SEQS)) : view_class = OBIView_NUC_SEQS - return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments) - + return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments, quality_column=quality_column) + cpdef dict read_view_infos(self, str view_name) : diff --git a/python/obitools3/obidms/capi/obiview.pxd b/python/obitools3/obidms/capi/obiview.pxd index efd8f48..0c36699 100644 --- a/python/obitools3/obidms/capi/obiview.pxd +++ b/python/obitools3/obidms/capi/obiview.pxd @@ -61,13 +61,13 @@ cdef extern from "obiview.h" nogil: ctypedef Obiview_t* Obiview_p - Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments) + Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments, bint quality_column) Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments) Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments) - Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments) + Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column) Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name) diff --git a/src/obiview.c b/src/obiview.c index 4821211..2cc8f33 100644 --- a/src/obiview.c +++ b/src/obiview.c @@ -1134,7 +1134,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con } -Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments) +Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments, bool quality_column) { Obiview_p view; OBIDMS_column_p associated_nuc_column; @@ -1176,30 +1176,41 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v return NULL; } // Adding quality column - associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN); - if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association + if (quality_column) { - obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); - return NULL; + associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN); + if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association + { + obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); + return NULL; + } } } // Add predicate functions of the view type - view->nb_predicates = 5; // TODO macro? + // TODO macros? + + if (quality_column) + view->nb_predicates = 5; + else + view->nb_predicates = 3; view->predicate_functions = malloc((view->nb_predicates) * sizeof(char* (*) (bool))); (view->predicate_functions)[0] = view_has_nuc_sequence_column; - (view->predicate_functions)[1] = view_has_quality_column; - (view->predicate_functions)[2] = view_has_id_column; - (view->predicate_functions)[3] = view_has_definition_column; - (view->predicate_functions)[4] = view_check_quality_matches_seq_column; + (view->predicate_functions)[1] = view_has_id_column; + (view->predicate_functions)[2] = view_has_definition_column; + if (quality_column) + { + (view->predicate_functions)[3] = view_has_quality_column; + (view->predicate_functions)[4] = view_check_quality_matches_seq_column; + } return view; } -Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments) +Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments, bool quality_column) { Obiview_p view; Obiview_p view_to_clone; @@ -1207,7 +1218,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_ view_to_clone = obi_open_view(dms, view_to_clone_name); if (view_to_clone == NULL) return NULL; - view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments); + view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column); obi_close_view(view_to_clone); diff --git a/src/obiview.h b/src/obiview.h index eefd4ef..0f5e975 100644 --- a/src/obiview.h +++ b/src/obiview.h @@ -183,6 +183,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con * - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored * - ID_COLUMN where sequence identifiers are stored * - DEFINITION_COLUMN where sequence definitions are stored + * And, if quality_column is True: * - QUALITY_COLUMN where sequence qualities are stored * * @param dms A pointer on the OBIDMS. @@ -191,6 +192,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con * @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone * if there is one. NULL if there is no line selection or no view to clone. * @param comments Eventually, comments to associate with the view. NULL if not. + * @param quality_column Whether or not a sequence quality column is associated with the view. * * @returns A pointer to the newly created view structure. * @retval NULL if an error occurred. @@ -198,7 +200,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con * @since February 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments); +Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments, bool quality_column); /** @@ -215,6 +217,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v * - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored * - ID_COLUMN where sequence identifiers are stored * - DEFINITION_COLUMN where sequence definitions are stored + * And, if quality_column is True: * - QUALITY_COLUMN where sequence qualities are stored * * @param dms A pointer on the OBIDMS. @@ -223,6 +226,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v * @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone * if there is one. NULL if there is no line selection or no view to clone. * @param comments Eventually, comments to associate with the view. NULL if not. + * @param quality_column Whether or not a sequence quality column is associated with the view. * * @returns A pointer to the newly created view structure. * @retval NULL if an error occurred. @@ -230,7 +234,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v * @since February 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments); +Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments, bool quality_column); /**