obitools3/python/obitools3/commands/lcs.pyx

#cython: language_level=3
#
# from obitools3.apps.progress cimport ProgressBar  # @UnresolvedImport
# from obitools3.dms.dms import OBIDMS       # TODO cimport doesn't work
# from obitools3.utils cimport str2bytes
#
# from obitools3.dms.capi.obialign cimport obi_lcs_align_one_column, \
#                                          obi_lcs_align_two_columns
#
#
# import time
#
# __title__="Aligns one sequence column with itself or two sequence columns"
#
#
# default_config = {   'inputview'    : None,
#                  }
#
# def addOptions(parser):
#
#     # TODO put this common group somewhere else but I don't know where.
#     # Also some options should probably be in another group
#     group=parser.add_argument_group('DMS and view options')
#
#     group.add_argument('--default-dms', '-d',
#                        action="store", dest="obi:defaultdms",
#                        metavar='<DMS NAME>',
#                        default=None,
#                        type=str,
#                        help="Name of the default DMS for reading and writing data.")
#
#     group.add_argument('--input-view-1', '-i',
#                        action="store", dest="obi:inputview1",
#                        metavar='<INPUT VIEW NAME>',
#                        default=None,
#                        type=str,
#                        help="Name of the (first) input view.")
#
#     group.add_argument('--input-view-2', '-I',
#                        action="store", dest="obi:inputview2",
#                        metavar='<INPUT VIEW NAME>',
#                        default="",
#                        type=str,
#                        help="Eventually, the name of the second input view.")
#
#     group.add_argument('--input-column-1', '-c',
#                        action="store", dest="obi:inputcolumn1",
#                        metavar='<INPUT COLUMN NAME>',
#                        default="",
#                        type=str,
#                        help="Name of the (first) input column. "
#                             " Default: the default nucleotide sequence column of the view if there is one.")
#
#     group.add_argument('--input-column-2', '-C',
#                        action="store", dest="obi:inputcolumn2",
#                        metavar='<INPUT COLUMN NAME>',
#                        default="",
#                        type=str,
#                        help="Eventually, the name of the second input column.")
#
#     group.add_argument('--input-elt-1', '-e',
#                        action="store", dest="obi:inputelement1",
#                        metavar='<INPUT ELEMENT NAME>',
#                        default="",
#                        type=str,
#                        help="If the first input column has multiple elements per line, name of the element referring to the sequence to align. "
#                             " Default: the first element of the line.")
#
#     group.add_argument('--input-elt-2', '-E',
#                        action="store", dest="obi:inputelement2",
#                        metavar='<INPUT ELEMENT NAME>',
#                        default="",
#                        type=str,
#                        help="If the second input column has multiple elements per line, name of the element referring to the sequence to align. "
#                             " Default: the first element of the line.")
#
#     group.add_argument('--id-column-1', '-f',
#                        action="store", dest="obi:idcolumn1",
#                        metavar='<ID COLUMN NAME>',
#                        default="",
#                        type=str,
#                        help="Name of the (first) column containing the identifiers of the sequences to align. "
#                             " Default: the default ID column of the view if there is one.")
#
#     group.add_argument('--id-column-2', '-F',
#                        action="store", dest="obi:idcolumn2",
#                        metavar='<ID COLUMN NAME>',
#                        default="",
#                        type=str,
#                        help="Eventually, the name of the second ID column.")
#
#     group.add_argument('--output-view', '-o',
#                        action="store", dest="obi:outputview",
#                        metavar='<OUTPUT VIEW NAME>',
#                        default=None,
#                        type=str,
#                        help="Name of the output view.")
#
#
#     group=parser.add_argument_group('obi lcs specific options')
#
#     group.add_argument('--threshold','-t',
#                        action="store", dest="align:threshold",
#                        metavar='<THRESHOLD>',
#                        default=0.0,
#                        type=float,
#                        help="Score threshold. If the score is normalized and expressed in similarity (default),"
#                             " it is an identity, e.g. 0.95 for an identity of 95%%. If the score is normalized"
#                             " and expressed in distance, it is (1.0 - identity), e.g. 0.05 for an identity of 95%%."
#                             " If the score is not normalized and expressed in similarity, it is the length of the"
#                             " Longest Common Subsequence. If the score is not normalized and expressed in distance,"
#                             " it is (reference length - LCS length)."
#                             " Only sequence pairs with a similarity above <THRESHOLD> are printed. Default: 0.00"
#                             " (no threshold).")
#
#     group.add_argument('--longest-length','-L',
#                        action="store_const", dest="align:reflength",
#                        default=0,
#                        const=1,
#                        help="The reference length is the length of the longest sequence."
#                             " Default: the reference length is the length of the alignment.")
#
#     group.add_argument('--shortest-length','-l',
#                        action="store_const", dest="align:reflength",
#                        default=0,
#                        const=2,
#                        help="The reference length is the length of the shortest sequence."
#                             " Default: the reference length is the length of the alignment.")
#
#     group.add_argument('--raw','-r',
#                        action="store_false", dest="align:normalize",
#                        default=True,
#                        help="Raw score, not normalized. Default: score is normalized with the reference sequence length.")
#
#     group.add_argument('--distance','-D',
#                        action="store_false", dest="align:similarity",
#                        default=True,
#                        help="Score is expressed in distance. Default: score is expressed in similarity.")
#
#     group.add_argument('--print-seq','-s',
#                        action="store_true", dest="align:printseq",
#                        default=False,
#                        help="The nucleotide sequences are written in the output view. Default: they are not written.")
#
#     group.add_argument('--print-count','-n',
#                        action="store_true", dest="align:printcount",
#                        default=False,
#                        help="Sequence counts are written in the output view. Default: they are not written.")
#
#     group.add_argument('--thread-count','-p',   # TODO should probably be in a specific option group
#                        action="store", dest="align:threadcount",
#                        metavar='<THREAD COUNT>',
#                        default=1,
#                        type=int,
#                        help="Number of threads to use for the computation. Default: one.")
#
#
# # cpdef align(str dms_n,
# #             str input_view_1_n, str output_view_n,
# #             str input_view_2_n="",
# #             str input_column_1_n="", str input_column_2_n="",
# #             str input_elt_1_n="", str input_elt_2_n="",
# #             str id_column_1_n="", str id_column_2_n="",
# #             double threshold=0.0, bint normalize=True,
# #             int reference=0, bint similarity_mode=True,
# #             bint print_seq=False, bint print_count=False,
# #             comments="",
# #             int thread_count=1) :
# #
# #     cdef OBIDMS d
# #     d = OBIDMS(dms_n)
# #
# #     if input_view_2_n == "" and input_column_2_n == "" :
# #         if obi_lcs_align_one_column(d._pointer, \
# #                                     str2bytes(input_view_1_n), \
# #                                     str2bytes(input_column_1_n), \
# #                                     str2bytes(input_elt_1_n), \
# #                                     str2bytes(id_column_1_n), \
# #                                     str2bytes(output_view_n), \
# #                                     str2bytes(comments), \
# #                                     print_seq, \
# #                                     print_count, \
# #                                     threshold, normalize, reference, similarity_mode,
# #                                     thread_count) < 0 :
# #             raise Exception("Error aligning sequences")
# #     else :
# #         if obi_lcs_align_two_columns(d._pointer, \
# #                                      str2bytes(input_view_1_n), \
# #                                      str2bytes(input_view_2_n), \
# #                                      str2bytes(input_column_1_n), \
# #                                      str2bytes(input_column_2_n), \
# #                                      str2bytes(input_elt_1_n), \
# #                                      str2bytes(input_elt_2_n), \
# #                                      str2bytes(id_column_1_n), \
# #                                      str2bytes(id_column_2_n), \
# #                                      str2bytes(output_view_n), \
# #                                      str2bytes(comments), \
# #                                      print_seq, \
# #                                      print_count, \
# #                                      threshold, normalize, reference, similarity_mode) < 0 :
# #             raise Exception("Error aligning sequences")
# #
# #     d.close()
# #
# #
def run(config):
    pass
    # TODO: Build formatted comments with all parameters etc
#     comments = "Obi align"
#
#     # Call cython alignment function
#     align(config['obi']['defaultdms'],  \
#           config['obi']['inputview1'],  \
#           config['obi']['outputview'],  \
#           input_view_2_n   = config['obi']['inputview2'],  \
#           input_column_1_n = config['obi']['inputcolumn1'],  \
#           input_column_2_n = config['obi']['inputcolumn2'], \
#           input_elt_1_n    = config['obi']['inputelement1'],  \
#           input_elt_2_n    = config['obi']['inputelement2'], \
#           id_column_1_n    = config['obi']['idcolumn1'],  \
#           id_column_2_n    = config['obi']['idcolumn2'], \
#           threshold        = config['align']['threshold'], \
#           normalize        = config['align']['normalize'],  \
#           reference        = config['align']['reflength'],  \
#           similarity_mode  = config['align']['similarity'],  \
#           print_seq        = config['align']['printseq'],  \
#           print_count      = config['align']['printcount'], \
#           comments         = comments, \
#           thread_count     = config['align']['threadcount'])
#
#     print("Done.")
# #
# #
# #
# #
# #