Quality columns are now optional in NUC_SEQS views + minor fixes
This commit is contained in:
@ -108,6 +108,8 @@ def run(config):
|
||||
# Call cython alignment function
|
||||
iview.align(oview)
|
||||
|
||||
print(oview.__repr__())
|
||||
|
||||
iview.save_and_close()
|
||||
oview.save_and_close()
|
||||
d.close()
|
||||
|
@ -75,6 +75,8 @@ def run(config):
|
||||
# Create output view with the line selection
|
||||
oview = d.new_view(config['obi']['outputview'], view_to_clone=iview, line_selection=selection, comments="obi grep: "+config['grep']['predicate']+"\n")
|
||||
|
||||
print(oview.__repr__())
|
||||
|
||||
iview.save_and_close()
|
||||
oview.save_and_close()
|
||||
d.close()
|
||||
|
@ -43,8 +43,6 @@ def addOptions(parser):
|
||||
required=True,
|
||||
help="Name of the default DMS for reading and writing data")
|
||||
|
||||
group=parser.add_argument_group('obi import specific options')
|
||||
|
||||
group.add_argument('--skip',
|
||||
action="store", dest="import:skip",
|
||||
metavar='<N>',
|
||||
@ -90,8 +88,9 @@ def addOptions(parser):
|
||||
|
||||
|
||||
|
||||
# TODO: Handling of NA values
|
||||
def run(config):
|
||||
pb = ProgressBar(35000000,config,seconde=5)
|
||||
#pb = ProgressBar(35000000, config, seconde=5)
|
||||
|
||||
inputs = uopen(config['import']['filename'])
|
||||
|
||||
@ -106,34 +105,27 @@ def run(config):
|
||||
else:
|
||||
raise RuntimeError('No file format specified')
|
||||
|
||||
# Temporary way to handle NA values
|
||||
#NA_list = ["nan"]
|
||||
|
||||
# Create DMS
|
||||
d = OBIDMS(config['obi']['defaultdms'])
|
||||
|
||||
# Create view
|
||||
view = d.new_view(config['import']['destview'], view_type=view_type)
|
||||
view = d.new_view(config['import']['destview'], view_type=view_type, quality_column=get_quality)
|
||||
|
||||
i = 0
|
||||
for seq in iseq:
|
||||
pb(i)
|
||||
#pb(i)
|
||||
view[i].set_id(seq['id'])
|
||||
view[i].set_definition(seq['definition'])
|
||||
view[i].set_sequence(seq['sequence'])
|
||||
if get_quality :
|
||||
view[i].set_quality(seq['quality'])
|
||||
for tag in seq['tags'] :
|
||||
#print(tag, seq['tags'][tag])
|
||||
#if seq['tags'][tag] not in NA_list :
|
||||
view[i][tag] = seq['tags'][tag]
|
||||
i+=1
|
||||
|
||||
#print(i)
|
||||
print(view.__repr__())
|
||||
|
||||
view.save_and_close()
|
||||
d.close()
|
||||
|
||||
print("Done.")
|
||||
|
@ -71,11 +71,6 @@ cdef class OBIView:
|
||||
|
||||
cdef class OBIView_NUC_SEQS(OBIView):
|
||||
|
||||
cdef OBIDMS_column ids
|
||||
cdef OBIDMS_column sequences
|
||||
cdef OBIDMS_column definitions
|
||||
cdef OBIDMS_column qualities
|
||||
|
||||
cpdef delete_column(self, str column_name)
|
||||
cpdef align(self,
|
||||
OBIView oview,
|
||||
@ -101,7 +96,7 @@ cdef class OBIDMS:
|
||||
cpdef close(self)
|
||||
cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name)
|
||||
cpdef OBIView open_view(self, str view_name)
|
||||
cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*)
|
||||
cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*, bint quality_column=*)
|
||||
cpdef dict read_view_infos(self, str view_name)
|
||||
# cpdef dict read_views(self) TODO
|
||||
|
||||
|
@ -247,7 +247,8 @@ cdef class OBIDMS_column_line :
|
||||
|
||||
cdef class OBIView :
|
||||
|
||||
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""):
|
||||
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments="", bint quality_column=False):
|
||||
# TODO quality_column is only here because it's needed for OBIView_NUC_SEQS views, not clean
|
||||
|
||||
cdef Obiview_p view = NULL
|
||||
cdef int i
|
||||
@ -466,7 +467,7 @@ cdef class OBIView :
|
||||
|
||||
cdef class OBIView_NUC_SEQS(OBIView):
|
||||
|
||||
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""):
|
||||
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments="", bint quality_column=False):
|
||||
|
||||
cdef Obiview_p view = NULL
|
||||
cdef int i
|
||||
@ -490,11 +491,11 @@ cdef class OBIView_NUC_SEQS(OBIView):
|
||||
if new :
|
||||
if view_to_clone is not None :
|
||||
if type(view_to_clone) == str :
|
||||
view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments))
|
||||
view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments), quality_column)
|
||||
else :
|
||||
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), (<OBIView> view_to_clone).pointer, line_selection_p, str2bytes(comments))
|
||||
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), (<OBIView> view_to_clone).pointer, line_selection_p, str2bytes(comments), quality_column)
|
||||
elif view_to_clone is None :
|
||||
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments))
|
||||
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments), quality_column)
|
||||
elif not new :
|
||||
if view_name is not None :
|
||||
view = obi_open_view(dms.pointer, str2bytes(view_name))
|
||||
@ -517,11 +518,6 @@ cdef class OBIView_NUC_SEQS(OBIView):
|
||||
subclass = OBIDMS_column.get_subclass_type(column_p)
|
||||
self.columns[col_name] = subclass(self, col_name)
|
||||
|
||||
self.ids = self.columns[bytes2str(ID_COLUMN)]
|
||||
self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)]
|
||||
self.definitions = self.columns[bytes2str(DEFINITION_COLUMN)]
|
||||
self.qualities = self.columns[bytes2str(QUALITY_COLUMN)]
|
||||
|
||||
def __getitem__(self, object item) :
|
||||
if type(item) == str :
|
||||
return (self.columns)[item]
|
||||
@ -690,7 +686,7 @@ cdef class OBIDMS :
|
||||
return view_class(self, view_name)
|
||||
|
||||
|
||||
cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="") :
|
||||
cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="", bint quality_column=False) :
|
||||
|
||||
cdef object view_class
|
||||
|
||||
@ -706,7 +702,7 @@ cdef class OBIDMS :
|
||||
isinstance(view_to_clone, OBIView_NUC_SEQS)) :
|
||||
view_class = OBIView_NUC_SEQS
|
||||
|
||||
return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments)
|
||||
return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments, quality_column=quality_column)
|
||||
|
||||
|
||||
cpdef dict read_view_infos(self, str view_name) :
|
||||
|
@ -61,13 +61,13 @@ cdef extern from "obiview.h" nogil:
|
||||
ctypedef Obiview_t* Obiview_p
|
||||
|
||||
|
||||
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
|
||||
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments, bint quality_column)
|
||||
|
||||
Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
|
||||
|
||||
Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
|
||||
|
||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
|
||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments, bint quality_column)
|
||||
|
||||
Obiview_infos_p obi_view_map_file(OBIDMS_p dms, const char* view_name)
|
||||
|
||||
|
@ -1134,7 +1134,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
|
||||
}
|
||||
|
||||
|
||||
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments)
|
||||
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments, bool quality_column)
|
||||
{
|
||||
Obiview_p view;
|
||||
OBIDMS_column_p associated_nuc_column;
|
||||
@ -1176,6 +1176,8 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
return NULL;
|
||||
}
|
||||
// Adding quality column
|
||||
if (quality_column)
|
||||
{
|
||||
associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN);
|
||||
if (obi_view_add_column(view, QUALITY_COLUMN, -1, QUALITY_COLUMN, OBI_QUAL, 0, 1, QUALITY_COLUMN, "", (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association
|
||||
{
|
||||
@ -1183,23 +1185,32 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add predicate functions of the view type
|
||||
view->nb_predicates = 5; // TODO macro?
|
||||
// TODO macros?
|
||||
|
||||
if (quality_column)
|
||||
view->nb_predicates = 5;
|
||||
else
|
||||
view->nb_predicates = 3;
|
||||
|
||||
view->predicate_functions = malloc((view->nb_predicates) * sizeof(char* (*) (bool)));
|
||||
|
||||
(view->predicate_functions)[0] = view_has_nuc_sequence_column;
|
||||
(view->predicate_functions)[1] = view_has_quality_column;
|
||||
(view->predicate_functions)[2] = view_has_id_column;
|
||||
(view->predicate_functions)[3] = view_has_definition_column;
|
||||
(view->predicate_functions)[1] = view_has_id_column;
|
||||
(view->predicate_functions)[2] = view_has_definition_column;
|
||||
if (quality_column)
|
||||
{
|
||||
(view->predicate_functions)[3] = view_has_quality_column;
|
||||
(view->predicate_functions)[4] = view_check_quality_matches_seq_column;
|
||||
}
|
||||
|
||||
return view;
|
||||
}
|
||||
|
||||
|
||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments)
|
||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments, bool quality_column)
|
||||
{
|
||||
Obiview_p view;
|
||||
Obiview_p view_to_clone;
|
||||
@ -1207,7 +1218,7 @@ Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_
|
||||
view_to_clone = obi_open_view(dms, view_to_clone_name);
|
||||
if (view_to_clone == NULL)
|
||||
return NULL;
|
||||
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments);
|
||||
view = obi_new_view_nuc_seqs(dms, view_name, view_to_clone, line_selection, comments, quality_column);
|
||||
|
||||
obi_close_view(view_to_clone);
|
||||
|
||||
|
@ -183,6 +183,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
|
||||
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
|
||||
* - ID_COLUMN where sequence identifiers are stored
|
||||
* - DEFINITION_COLUMN where sequence definitions are stored
|
||||
* And, if quality_column is True:
|
||||
* - QUALITY_COLUMN where sequence qualities are stored
|
||||
*
|
||||
* @param dms A pointer on the OBIDMS.
|
||||
@ -191,6 +192,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
|
||||
* @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone
|
||||
* if there is one. NULL if there is no line selection or no view to clone.
|
||||
* @param comments Eventually, comments to associate with the view. NULL if not.
|
||||
* @param quality_column Whether or not a sequence quality column is associated with the view.
|
||||
*
|
||||
* @returns A pointer to the newly created view structure.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -198,7 +200,7 @@ Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, con
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments);
|
||||
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments, bool quality_column);
|
||||
|
||||
|
||||
/**
|
||||
@ -215,6 +217,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
* - NUC_SEQUENCE_COLUMN where nucleotide sequences are stored
|
||||
* - ID_COLUMN where sequence identifiers are stored
|
||||
* - DEFINITION_COLUMN where sequence definitions are stored
|
||||
* And, if quality_column is True:
|
||||
* - QUALITY_COLUMN where sequence qualities are stored
|
||||
*
|
||||
* @param dms A pointer on the OBIDMS.
|
||||
@ -223,6 +226,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
* @param line_selection Eventually a pointer on a list of indexes corresponding to a line selection to use with the view to clone
|
||||
* if there is one. NULL if there is no line selection or no view to clone.
|
||||
* @param comments Eventually, comments to associate with the view. NULL if not.
|
||||
* @param quality_column Whether or not a sequence quality column is associated with the view.
|
||||
*
|
||||
* @returns A pointer to the newly created view structure.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -230,7 +234,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments);
|
||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments, bool quality_column);
|
||||
|
||||
|
||||
/**
|
||||
|
Reference in New Issue
Block a user