Quality columns are now optional in NUC_SEQS views + minor fixes

This commit is contained in:
Celine Mercier
2016-08-16 15:17:26 +02:00
parent cf839522e7
commit e4129610cf
8 changed files with 54 additions and 52 deletions

View File

@ -108,6 +108,8 @@ def run(config):
# Call cython alignment function
iview.align(oview)
print(oview.__repr__())
iview.save_and_close()
oview.save_and_close()
d.close()

View File

@ -75,6 +75,8 @@ def run(config):
# Create output view with the line selection
oview = d.new_view(config['obi']['outputview'], view_to_clone=iview, line_selection=selection, comments="obi grep: "+config['grep']['predicate']+"\n")
print(oview.__repr__())
iview.save_and_close()
oview.save_and_close()
d.close()

View File

@ -43,8 +43,6 @@ def addOptions(parser):
required=True,
help="Name of the default DMS for reading and writing data")
group=parser.add_argument_group('obi import specific options')
group.add_argument('--skip',
action="store", dest="import:skip",
metavar='<N>',
@ -90,8 +88,9 @@ def addOptions(parser):
# TODO: Handling of NA values
def run(config):
pb = ProgressBar(35000000,config,seconde=5)
#pb = ProgressBar(35000000, config, seconde=5)
inputs = uopen(config['import']['filename'])
@ -106,34 +105,27 @@ def run(config):
else:
raise RuntimeError('No file format specified')
# Temporary way to handle NA values
#NA_list = ["nan"]
# Create DMS
d = OBIDMS(config['obi']['defaultdms'])
# Create view
view = d.new_view(config['import']['destview'], view_type=view_type)
view = d.new_view(config['import']['destview'], view_type=view_type, quality_column=get_quality)
i = 0
for seq in iseq:
pb(i)
#pb(i)
view[i].set_id(seq['id'])
view[i].set_definition(seq['definition'])
view[i].set_sequence(seq['sequence'])
if get_quality :
view[i].set_quality(seq['quality'])
for tag in seq['tags'] :
#print(tag, seq['tags'][tag])
#if seq['tags'][tag] not in NA_list :
view[i][tag] = seq['tags'][tag]
i+=1
#print(i)
print(view.__repr__())
view.save_and_close()
d.close()
print("Done.")

View File

@ -71,11 +71,6 @@ cdef class OBIView:
cdef class OBIView_NUC_SEQS(OBIView):
cdef OBIDMS_column ids
cdef OBIDMS_column sequences
cdef OBIDMS_column definitions
cdef OBIDMS_column qualities
cpdef delete_column(self, str column_name)
cpdef align(self,
OBIView oview,
@ -101,7 +96,7 @@ cdef class OBIDMS:
cpdef close(self)
cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name)
cpdef OBIView open_view(self, str view_name)
cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*)
cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*, bint quality_column=*)
cpdef dict read_view_infos(self, str view_name)
# cpdef dict read_views(self) TODO

View File

@ -247,7 +247,8 @@ cdef class OBIDMS_column_line :
cdef class OBIView :
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""):
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments="", bint quality_column=False):
# TODO quality_column is only here because it's needed for OBIView_NUC_SEQS views, not clean
cdef Obiview_p view = NULL
cdef int i
@ -466,7 +467,7 @@ cdef class OBIView :
cdef class OBIView_NUC_SEQS(OBIView):
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""):
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments="", bint quality_column=False):
cdef Obiview_p view = NULL
cdef int i
@ -490,11 +491,11 @@ cdef class OBIView_NUC_SEQS(OBIView):
if new :
if view_to_clone is not None :
if type(view_to_clone) == str :
view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments))
view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments), quality_column)
else :
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), (<OBIView> view_to_clone).pointer, line_selection_p, str2bytes(comments))
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), (<OBIView> view_to_clone).pointer, line_selection_p, str2bytes(comments), quality_column)
elif view_to_clone is None :
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments))
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments), quality_column)
elif not new :
if view_name is not None :
view = obi_open_view(dms.pointer, str2bytes(view_name))
@ -517,11 +518,6 @@ cdef class OBIView_NUC_SEQS(OBIView):
subclass = OBIDMS_column.get_subclass_type(column_p)
self.columns[col_name] = subclass(self, col_name)
self.ids = self.columns[bytes2str(ID_COLUMN)]
self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)]
self.definitions = self.columns[bytes2str(DEFINITION_COLUMN)]
self.qualities = self.columns[bytes2str(QUALITY_COLUMN)]
def __getitem__(self, object item) :
if type(item) == str :
return (self.columns)[item]
@ -690,7 +686,7 @@ cdef class OBIDMS :
return view_class(self, view_name)
cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="") :
cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="", bint quality_column=False) :
cdef object view_class
@ -706,7 +702,7 @@ cdef class OBIDMS :
isinstance(view_to_clone, OBIView_NUC_SEQS)) :
view_class = OBIView_NUC_SEQS
return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments)
return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments, quality_column=quality_column)
cpdef dict read_view_infos(self, str view_name) :

View File

@ -61,13 +61,13 @@ cdef extern from "obiview.h" nogil:
ctypedef Obiview_t* Obiview_p
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments, bint quality_column)
Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column)
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)

View File

@ -1134,7 +1134,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
}
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments)
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments, bool quality_column)
{
Obiview_p view;
OBIDMS_column_p associated_nuc_column;
@ -1176,6 +1176,8 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
return NULL;
}
// Adding quality column
if (quality_column)
{
associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN);
if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
{
@ -1183,23 +1185,32 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
return NULL;
}
}
}
// Add predicate functions of the view type
view->nb_predicates = 5; // TODO macro?
// TODO macros?
if (quality_column)
view->nb_predicates = 5;
else
view->nb_predicates = 3;
view->predicate_functions = malloc((view->nb_predicates) * sizeof(char* (*) (bool)));
(view->predicate_functions)[0] = view_has_nuc_sequence_column;
(view->predicate_functions)[1] = view_has_quality_column;
(view->predicate_functions)[2] = view_has_id_column;
(view->predicate_functions)[3] = view_has_definition_column;
(view->predicate_functions)[1] = view_has_id_column;
(view->predicate_functions)[2] = view_has_definition_column;
if (quality_column)
{
(view->predicate_functions)[3] = view_has_quality_column;
(view->predicate_functions)[4] = view_check_quality_matches_seq_column;
}
return view;
}
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments)
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments, bool quality_column)
{
Obiview_p view;
Obiview_p view_to_clone;
@ -1207,7 +1218,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
view_to_clone = obi_open_view(dms, view_to_clone_name);
if (view_to_clone == NULL)
return NULL;
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments);
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column);
obi_close_view(view_to_clone);

View File

@ -183,6 +183,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
* - ID_COLUMN where sequence identifiers are stored
* - DEFINITION_COLUMN where sequence definitions are stored
* And, if quality_column is True:
* - QUALITY_COLUMN where sequence qualities are stored
*
* @param dms A pointer on the OBIDMS.
@ -191,6 +192,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
* @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone
* if there is one. NULL if there is no line selection or no view to clone.
* @param comments Eventually, comments to associate with the view. NULL if not.
* @param quality_column Whether or not a sequence quality column is associated with the view.
*
* @returns A pointer to the newly created view structure.
* @retval NULL if an error occurred.
@ -198,7 +200,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments);
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments, bool quality_column);
/**
@ -215,6 +217,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
* - ID_COLUMN where sequence identifiers are stored
* - DEFINITION_COLUMN where sequence definitions are stored
* And, if quality_column is True:
* - QUALITY_COLUMN where sequence qualities are stored
*
* @param dms A pointer on the OBIDMS.
@ -223,6 +226,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
* @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone
* if there is one. NULL if there is no line selection or no view to clone.
* @param comments Eventually, comments to associate with the view. NULL if not.
* @param quality_column Whether or not a sequence quality column is associated with the view.
*
* @returns A pointer to the newly created view structure.
* @retval NULL if an error occurred.
@ -230,7 +234,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments);
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments, bool quality_column);
/**