Quality columns are now optional in NUC_SEQS views + minor fixes

This commit is contained in:
Celine Mercier
2016-08-16 15:17:26 +02:00
parent cf839522e7
commit e4129610cf
8 changed files with 54 additions and 52 deletions

View File

@ -108,6 +108,8 @@ def run(config):
# Call cython alignment function # Call cython alignment function
iview.align(oview) iview.align(oview)
print(oview.__repr__())
iview.save_and_close() iview.save_and_close()
oview.save_and_close() oview.save_and_close()
d.close() d.close()

View File

@ -75,6 +75,8 @@ def run(config):
# Create output view with the line selection # Create output view with the line selection
oview = d.new_view(config['obi']['outputview'], view_to_clone=iview, line_selection=selection, comments="obi grep: "+config['grep']['predicate']+"\n") oview = d.new_view(config['obi']['outputview'], view_to_clone=iview, line_selection=selection, comments="obi grep: "+config['grep']['predicate']+"\n")
print(oview.__repr__())
iview.save_and_close() iview.save_and_close()
oview.save_and_close() oview.save_and_close()
d.close() d.close()

View File

@ -43,8 +43,6 @@ def addOptions(parser):
required=True, required=True,
help="Name of the default DMS for reading and writing data") help="Name of the default DMS for reading and writing data")
group=parser.add_argument_group('obi import specific options')
group.add_argument('--skip', group.add_argument('--skip',
action="store", dest="import:skip", action="store", dest="import:skip",
metavar='<N>', metavar='<N>',
@ -90,8 +88,9 @@ def addOptions(parser):
# TODO: Handling of NA values
def run(config): def run(config):
pb = ProgressBar(35000000,config,seconde=5) #pb = ProgressBar(35000000, config, seconde=5)
inputs = uopen(config['import']['filename']) inputs = uopen(config['import']['filename'])
@ -106,34 +105,27 @@ def run(config):
else: else:
raise RuntimeError('No file format specified') raise RuntimeError('No file format specified')
# Temporary way to handle NA values
#NA_list = ["nan"]
# Create DMS # Create DMS
d = OBIDMS(config['obi']['defaultdms']) d = OBIDMS(config['obi']['defaultdms'])
# Create view # Create view
view = d.new_view(config['import']['destview'], view_type=view_type) view = d.new_view(config['import']['destview'], view_type=view_type, quality_column=get_quality)
i = 0 i = 0
for seq in iseq: for seq in iseq:
pb(i) #pb(i)
view[i].set_id(seq['id']) view[i].set_id(seq['id'])
view[i].set_definition(seq['definition']) view[i].set_definition(seq['definition'])
view[i].set_sequence(seq['sequence']) view[i].set_sequence(seq['sequence'])
if get_quality : if get_quality :
view[i].set_quality(seq['quality']) view[i].set_quality(seq['quality'])
for tag in seq['tags'] : for tag in seq['tags'] :
#print(tag, seq['tags'][tag])
#if seq['tags'][tag] not in NA_list :
view[i][tag] = seq['tags'][tag] view[i][tag] = seq['tags'][tag]
i+=1 i+=1
#print(i)
print(view.__repr__()) print(view.__repr__())
view.save_and_close() view.save_and_close()
d.close() d.close()
print("Done.") print("Done.")

View File

@ -71,11 +71,6 @@ cdef class OBIView:
cdef class OBIView_NUC_SEQS(OBIView): cdef class OBIView_NUC_SEQS(OBIView):
cdef OBIDMS_column ids
cdef OBIDMS_column sequences
cdef OBIDMS_column definitions
cdef OBIDMS_column qualities
cpdef delete_column(self, str column_name) cpdef delete_column(self, str column_name)
cpdef align(self, cpdef align(self,
OBIView oview, OBIView oview,
@ -101,7 +96,7 @@ cdef class OBIDMS:
cpdef close(self) cpdef close(self)
cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name) cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name)
cpdef OBIView open_view(self, str view_name) cpdef OBIView open_view(self, str view_name)
cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*) cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*, bint quality_column=*)
cpdef dict read_view_infos(self, str view_name) cpdef dict read_view_infos(self, str view_name)
# cpdef dict read_views(self) TODO # cpdef dict read_views(self) TODO

View File

@ -247,7 +247,8 @@ cdef class OBIDMS_column_line :
cdef class OBIView : cdef class OBIView :
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""): def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments="", bint quality_column=False):
# TODO quality_column is only here because it's needed for OBIView_NUC_SEQS views, not clean
cdef Obiview_p view = NULL cdef Obiview_p view = NULL
cdef int i cdef int i
@ -466,7 +467,7 @@ cdef class OBIView :
cdef class OBIView_NUC_SEQS(OBIView): cdef class OBIView_NUC_SEQS(OBIView):
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""): def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments="", bint quality_column=False):
cdef Obiview_p view = NULL cdef Obiview_p view = NULL
cdef int i cdef int i
@ -490,11 +491,11 @@ cdef class OBIView_NUC_SEQS(OBIView):
if new : if new :
if view_to_clone is not None : if view_to_clone is not None :
if type(view_to_clone) == str : if type(view_to_clone) == str :
view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments)) view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments), quality_column)
else : else :
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), (<OBIView> view_to_clone).pointer, line_selection_p, str2bytes(comments)) view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), (<OBIView> view_to_clone).pointer, line_selection_p, str2bytes(comments), quality_column)
elif view_to_clone is None : elif view_to_clone is None :
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments)) view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments), quality_column)
elif not new : elif not new :
if view_name is not None : if view_name is not None :
view = obi_open_view(dms.pointer, str2bytes(view_name)) view = obi_open_view(dms.pointer, str2bytes(view_name))
@ -517,11 +518,6 @@ cdef class OBIView_NUC_SEQS(OBIView):
subclass = OBIDMS_column.get_subclass_type(column_p) subclass = OBIDMS_column.get_subclass_type(column_p)
self.columns[col_name] = subclass(self, col_name) self.columns[col_name] = subclass(self, col_name)
self.ids = self.columns[bytes2str(ID_COLUMN)]
self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)]
self.definitions = self.columns[bytes2str(DEFINITION_COLUMN)]
self.qualities = self.columns[bytes2str(QUALITY_COLUMN)]
def __getitem__(self, object item) : def __getitem__(self, object item) :
if type(item) == str : if type(item) == str :
return (self.columns)[item] return (self.columns)[item]
@ -690,7 +686,7 @@ cdef class OBIDMS :
return view_class(self, view_name) return view_class(self, view_name)
cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="") : cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="", bint quality_column=False) :
cdef object view_class cdef object view_class
@ -706,7 +702,7 @@ cdef class OBIDMS :
isinstance(view_to_clone, OBIView_NUC_SEQS)) : isinstance(view_to_clone, OBIView_NUC_SEQS)) :
view_class = OBIView_NUC_SEQS view_class = OBIView_NUC_SEQS
return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments) return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments, quality_column=quality_column)
cpdef dict read_view_infos(self, str view_name) : cpdef dict read_view_infos(self, str view_name) :

View File

@ -61,13 +61,13 @@ cdef extern from "obiview.h" nogil:
ctypedef Obiview_t* Obiview_p ctypedef Obiview_t* Obiview_p
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments) Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments, bint quality_column)
Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments) Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments) Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments) Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column)
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name) Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)

View File

@ -1134,7 +1134,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
} }
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments) Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments, bool quality_column)
{ {
Obiview_p view; Obiview_p view;
OBIDMS_column_p associated_nuc_column; OBIDMS_column_p associated_nuc_column;
@ -1176,30 +1176,41 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
return NULL; return NULL;
} }
// Adding quality column // Adding quality column
associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN); if (quality_column)
if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
{ {
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN);
return NULL; if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
{
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
} }
} }
// Add predicate functions of the view type // Add predicate functions of the view type
view->nb_predicates = 5; // TODO macro? // TODO macros?
if (quality_column)
view->nb_predicates = 5;
else
view->nb_predicates = 3;
view->predicate_functions = malloc((view->nb_predicates) * sizeof(char* (*) (bool))); view->predicate_functions = malloc((view->nb_predicates) * sizeof(char* (*) (bool)));
(view->predicate_functions)[0] = view_has_nuc_sequence_column; (view->predicate_functions)[0] = view_has_nuc_sequence_column;
(view->predicate_functions)[1] = view_has_quality_column; (view->predicate_functions)[1] = view_has_id_column;
(view->predicate_functions)[2] = view_has_id_column; (view->predicate_functions)[2] = view_has_definition_column;
(view->predicate_functions)[3] = view_has_definition_column; if (quality_column)
(view->predicate_functions)[4] = view_check_quality_matches_seq_column; {
(view->predicate_functions)[3] = view_has_quality_column;
(view->predicate_functions)[4] = view_check_quality_matches_seq_column;
}
return view; return view;
} }
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments) Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments, bool quality_column)
{ {
Obiview_p view; Obiview_p view;
Obiview_p view_to_clone; Obiview_p view_to_clone;
@ -1207,7 +1218,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
view_to_clone = obi_open_view(dms, view_to_clone_name); view_to_clone = obi_open_view(dms, view_to_clone_name);
if (view_to_clone == NULL) if (view_to_clone == NULL)
return NULL; return NULL;
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments); view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column);
obi_close_view(view_to_clone); obi_close_view(view_to_clone);

View File

@ -183,6 +183,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored * - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
* - ID_COLUMN where sequence identifiers are stored * - ID_COLUMN where sequence identifiers are stored
* - DEFINITION_COLUMN where sequence definitions are stored * - DEFINITION_COLUMN where sequence definitions are stored
* And, if quality_column is True:
* - QUALITY_COLUMN where sequence qualities are stored * - QUALITY_COLUMN where sequence qualities are stored
* *
* @param dms A pointer on the OBIDMS. * @param dms A pointer on the OBIDMS.
@ -191,6 +192,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
* @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone * @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone
* if there is one. NULL if there is no line selection or no view to clone. * if there is one. NULL if there is no line selection or no view to clone.
* @param comments Eventually, comments to associate with the view. NULL if not. * @param comments Eventually, comments to associate with the view. NULL if not.
* @param quality_column Whether or not a sequence quality column is associated with the view.
* *
* @returns A pointer to the newly created view structure. * @returns A pointer to the newly created view structure.
* @retval NULL if an error occurred. * @retval NULL if an error occurred.
@ -198,7 +200,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
* @since February 2016 * @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments); Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments, bool quality_column);
/** /**
@ -215,6 +217,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored * - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
* - ID_COLUMN where sequence identifiers are stored * - ID_COLUMN where sequence identifiers are stored
* - DEFINITION_COLUMN where sequence definitions are stored * - DEFINITION_COLUMN where sequence definitions are stored
* And, if quality_column is True:
* - QUALITY_COLUMN where sequence qualities are stored * - QUALITY_COLUMN where sequence qualities are stored
* *
* @param dms A pointer on the OBIDMS. * @param dms A pointer on the OBIDMS.
@ -223,6 +226,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
* @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone * @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone
* if there is one. NULL if there is no line selection or no view to clone. * if there is one. NULL if there is no line selection or no view to clone.
* @param comments Eventually, comments to associate with the view. NULL if not. * @param comments Eventually, comments to associate with the view. NULL if not.
* @param quality_column Whether or not a sequence quality column is associated with the view.
* *
* @returns A pointer to the newly created view structure. * @returns A pointer to the newly created view structure.
* @retval NULL if an error occurred. * @retval NULL if an error occurred.
@ -230,7 +234,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
* @since February 2016 * @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org) * @author Celine Mercier (celine.mercier@metabarcoding.org)
*/ */
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments); Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments, bool quality_column);
/** /**