1st version of obi align command and reworked functions that handle
column alignment
This commit is contained in:
128
python/obitools3/commands/align.pyx
Normal file
128
python/obitools3/commands/align.pyx
Normal file
@ -0,0 +1,128 @@
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.obidms._obidms import OBIDMS, OBIView # TODO cimport doesn't work
|
||||
|
||||
|
||||
import time
|
||||
|
||||
__title__="Aligns one sequence column with itself or two sequence columns"
|
||||
|
||||
|
||||
default_config = { 'inputview' : None,
|
||||
'skip' : 0,
|
||||
'only' : None,
|
||||
'skiperror' : False,
|
||||
'moltype' : 'nuc',
|
||||
}
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
# TODO put this common group somewhere else but I don't know where
|
||||
group=parser.add_argument_group('DMS and view options')
|
||||
|
||||
group.add_argument('--default-dms','-d',
|
||||
action="store", dest="obi:defaultdms",
|
||||
metavar='<DMS NAME>',
|
||||
default=None,
|
||||
type=str,
|
||||
help="Name of the default DMS for reading and writing data.")
|
||||
|
||||
group.add_argument('--input-view','-i',
|
||||
action="store", dest="obi:inputview",
|
||||
metavar='<INPUT VIEW NAME>',
|
||||
default=None,
|
||||
type=str,
|
||||
help="Name of the input view, either raw if the view is in the default DMS,"
|
||||
" or in the form 'dms:view' if it is in another DMS.")
|
||||
|
||||
# TODO eventually 2nd view, or 2nd column?
|
||||
|
||||
group.add_argument('--output-view','-o',
|
||||
action="store", dest="obi:outputview",
|
||||
metavar='<OUTPUT VIEW NAME>',
|
||||
default=None,
|
||||
type=str,
|
||||
help="Name of the output view, either raw if the view is in the default DMS,"
|
||||
" or in the form 'dms:view' if it is in another DMS.")
|
||||
|
||||
|
||||
group=parser.add_argument_group('obi align specific options')
|
||||
|
||||
group.add_argument('--lcs','-C',
|
||||
action="store", dest="align:alitype",
|
||||
metavar='<ALIGNMENT TYPE>',
|
||||
default='lcs',
|
||||
type=str,
|
||||
help="Compute alignment using the LCS method.")
|
||||
|
||||
group.add_argument('--threshold','-t',
|
||||
action="store", dest="align:threshold",
|
||||
metavar='<THRESHOLD>',
|
||||
default=0.0,
|
||||
type=float,
|
||||
help="Score threshold. If the score is normalized and expressed in similarity (default),"
|
||||
" it is an identity, e.g. 0.95 for an identity of 95%%. If the score is normalized"
|
||||
" and expressed in distance, it is (1.0 - identity), e.g. 0.05 for an identity of 95%%."
|
||||
" If the score is not normalized and expressed in similarity, it is the length of the"
|
||||
" Longest Common Subsequence. If the score is not normalized and expressed in distance,"
|
||||
" it is (reference length - LCS length)."
|
||||
" Only sequence pairs with a similarity above <THRESHOLD> are printed. Default: 0.00"
|
||||
" (no threshold).")
|
||||
|
||||
group.add_argument('--longest_length','-L',
|
||||
action="store_const", dest="align:reflength",
|
||||
default="ali",
|
||||
const="longest",
|
||||
help="The reference length is the length of the longest sequence."
|
||||
" Default: the reference length is the length of the alignment.")
|
||||
|
||||
group.add_argument('--shortest_length','-l',
|
||||
action="store_const", dest="align:reflength",
|
||||
default="ali",
|
||||
const="shortest",
|
||||
help="The reference length is the length of the shortest sequence."
|
||||
" Default: the reference length is the length of the alignment.")
|
||||
|
||||
group.add_argument('--raw','-r',
|
||||
action="store_false", dest="align:normalize",
|
||||
default=True,
|
||||
help="Raw score, not normalized. Default: score is normalized with the reference sequence length.")
|
||||
|
||||
group.add_argument('--distance','-D',
|
||||
action="store_false", dest="align:similarity",
|
||||
default=True,
|
||||
help="Score is expressed in distance. Default: score is expressed in similarity.")
|
||||
|
||||
|
||||
|
||||
def run(config):
|
||||
|
||||
#pb = ProgressBar(1, config, seconde=5) # TODO
|
||||
|
||||
# Open DMS
|
||||
d = OBIDMS(config['obi']['defaultdms'])
|
||||
|
||||
# Open input view 1
|
||||
iview = d.open_view(config['obi']['inputview'])
|
||||
|
||||
# TODO Open input view 2 if there is one
|
||||
|
||||
# Create output view if necessary
|
||||
if config['obi']['outputview'] is not None :
|
||||
oview = d.new_view(config['obi']['outputview'])
|
||||
else :
|
||||
oview = None
|
||||
|
||||
# TODO Take other alignment types into account when they'll be implemented
|
||||
|
||||
# Call cython alignment function
|
||||
iview.align(output_view=oview)
|
||||
|
||||
print("Done.")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -77,7 +77,15 @@ cdef class OBIView_NUC_SEQS(OBIView):
|
||||
cdef OBIDMS_column qualities
|
||||
|
||||
cpdef delete_column(self, str column_name)
|
||||
|
||||
cpdef align(self,
|
||||
OBIView iview2=*,
|
||||
object output_view=*,
|
||||
double threshold=*,
|
||||
bint normalize=*,
|
||||
int reference=*,
|
||||
bint similarity_mode=*
|
||||
)
|
||||
|
||||
|
||||
cdef class OBIView_line :
|
||||
|
||||
|
@ -10,7 +10,9 @@ from .capi.obidmscolumn cimport obi_close_column, \
|
||||
OBIDMS_column_header_p
|
||||
|
||||
from .capi.obiutils cimport obi_format_date
|
||||
|
||||
|
||||
from .capi.obialign cimport obi_align_one_column
|
||||
|
||||
from .capi.obitypes cimport const_char_p, \
|
||||
OBIType_t, \
|
||||
OBI_INT, \
|
||||
@ -454,6 +456,7 @@ cdef class OBIView :
|
||||
for line in self.__iter__() :
|
||||
to_print = to_print + str(line) + "\n"
|
||||
return to_print
|
||||
|
||||
|
||||
#############################################
|
||||
|
||||
@ -524,7 +527,76 @@ cdef class OBIView_NUC_SEQS(OBIView):
|
||||
def __setitem__(self, index_t line_idx, OBI_Nuc_Seq sequence_obj) :
|
||||
for key in sequence_obj :
|
||||
self[line_idx][key] = sequence_obj[key]
|
||||
|
||||
|
||||
|
||||
# TODO
|
||||
cpdef align(self, OBIView iview2=None, object output_view=None,
|
||||
double threshold=0.0, bint normalize=True, int reference=0, bint similarity_mode=True) :
|
||||
|
||||
cdef OBIView iview1
|
||||
cdef OBIView oview
|
||||
|
||||
cdef Obiview_p iview1_p
|
||||
cdef Obiview_p iview2_p
|
||||
cdef Obiview_p oview_p
|
||||
|
||||
cdef OBIDMS_column icol1
|
||||
cdef OBIDMS_column_p icol1_p
|
||||
cdef OBIDMS_column_p* icol1_pp
|
||||
|
||||
cdef OBIDMS_column id1_col
|
||||
cdef OBIDMS_column_p id1_col_p
|
||||
cdef OBIDMS_column_p* id1_col_pp
|
||||
|
||||
cdef OBIDMS_column id2_col
|
||||
cdef OBIDMS_column_p id2_col_p
|
||||
cdef OBIDMS_column_p* id2_col_pp
|
||||
|
||||
cdef OBIDMS_column ocol
|
||||
cdef OBIDMS_column_p ocol_p
|
||||
cdef OBIDMS_column_p* ocol_pp
|
||||
|
||||
cdef str id1_col_name
|
||||
cdef str id2_col_name
|
||||
cdef str score_col_name
|
||||
|
||||
id1_col_name = "ID1" # TODO discuss names, aliases
|
||||
id2_col_name = "ID2"
|
||||
score_col_name = "score"
|
||||
|
||||
iview1= self
|
||||
iview1_p = iview1.pointer
|
||||
icol1 = iview1[bytes2str(NUC_SEQUENCE_COLUMN)]
|
||||
icol1_pp = icol1.pointer
|
||||
icol1_p = icol1_pp[0]
|
||||
|
||||
# Create the output view if needed
|
||||
if output_view is None :
|
||||
oview = self.dms.new_view("alignment_score_view") # TODO discuss
|
||||
elif type(output_view) == str :
|
||||
oview = self.dms.new_view(output_view)
|
||||
else :
|
||||
oview = output_view
|
||||
|
||||
oview.add_column(id1_col_name, type='OBI_STR', create=True)
|
||||
oview.add_column(id2_col_name, type='OBI_STR', create=True)
|
||||
oview.add_column(score_col_name, type='OBI_FLOAT', create=True)
|
||||
|
||||
oview_p = oview.pointer
|
||||
ocol = oview[score_col_name]
|
||||
ocol_pp = ocol.pointer
|
||||
ocol_p = ocol_pp[0]
|
||||
|
||||
id1_col = oview[id1_col_name]
|
||||
id2_col = oview[id2_col_name]
|
||||
id1_col_pp = id1_col.pointer
|
||||
id2_col_pp = id2_col.pointer
|
||||
id1_col_p = id1_col_pp[0]
|
||||
id2_col_p = id2_col_pp[0]
|
||||
|
||||
if obi_align_one_column(iview1_p, icol1_p, oview_p, id1_col_p, id2_col_p, ocol_p, threshold, normalize, reference, similarity_mode) < 0 :
|
||||
raise Exception("Error aligning sequences")
|
||||
|
||||
|
||||
#############################################
|
||||
|
||||
|
@ -8,17 +8,6 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
|
||||
# TO DISCUSS :
|
||||
# I'am not sure that this method has to be declared here
|
||||
# Alignment must be declared outside of the sequence object
|
||||
cpdef align(self,
|
||||
OBIView score_view,
|
||||
OBIDMS_column score_column,
|
||||
double threshold = *,
|
||||
bint normalize = *,
|
||||
int reference = *,
|
||||
bint similarity_mode = *)
|
||||
|
||||
|
||||
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||
|
@ -37,18 +37,6 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||
else :
|
||||
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, str2bytes(value)) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
# TODO choose alignment type (lcs or other) with supplementary argument
|
||||
cpdef align(self,
|
||||
OBIView score_view,
|
||||
OBIDMS_column score_column,
|
||||
double threshold = 0.0,
|
||||
bint normalize = True,
|
||||
int reference = 0, # TODO
|
||||
bint similarity_mode = True):
|
||||
if (obi_align_one_column(self.view.pointer, (self.pointer)[0], score_view.pointer, (score_column.pointer)[0], threshold, normalize, reference, similarity_mode) < 0) :
|
||||
raise Exception("An error occurred while aligning sequences")
|
||||
|
||||
|
||||
|
||||
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
||||
|
@ -6,5 +6,14 @@ from ..capi.obidmscolumn cimport OBIDMS_column_p
|
||||
|
||||
cdef extern from "obi_align.h" nogil:
|
||||
|
||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview_p score_view, OBIDMS_column_p score_column, double threshold, bint normalize, int reference, bint similarity_mode)
|
||||
int obi_align_one_column(Obiview_p seq_view,
|
||||
OBIDMS_column_p seq_column,
|
||||
Obiview_p score_view,
|
||||
OBIDMS_column_p id1_column,
|
||||
OBIDMS_column_p id2_column,
|
||||
OBIDMS_column_p score_column,
|
||||
double threshold,
|
||||
bint normalize,
|
||||
int reference,
|
||||
bint similarity_mode)
|
||||
|
||||
|
Reference in New Issue
Block a user