1st version of obi align command and reworked functions that handle

column alignment
This commit is contained in:
Celine Mercier
2016-08-10 14:51:02 +02:00
parent 26b8e1f215
commit 2aaa87edcc
8 changed files with 373 additions and 43 deletions

View File

@ -0,0 +1,128 @@
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.obidms._obidms import OBIDMS, OBIView # TODO cimport doesn't work
import time
__title__="Aligns one sequence column with itself or two sequence columns"
default_config = { 'inputview' : None,
'skip' : 0,
'only' : None,
'skiperror' : False,
'moltype' : 'nuc',
}
def addOptions(parser):
# TODO put this common group somewhere else but I don't know where
group=parser.add_argument_group('DMS and view options')
group.add_argument('--default-dms','-d',
action="store", dest="obi:defaultdms",
metavar='<DMS NAME>',
default=None,
type=str,
help="Name of the default DMS for reading and writing data.")
group.add_argument('--input-view','-i',
action="store", dest="obi:inputview",
metavar='<INPUT VIEW NAME>',
default=None,
type=str,
help="Name of the input view, either raw if the view is in the default DMS,"
" or in the form 'dms:view' if it is in another DMS.")
# TODO eventually 2nd view, or 2nd column?
group.add_argument('--output-view','-o',
action="store", dest="obi:outputview",
metavar='<OUTPUT VIEW NAME>',
default=None,
type=str,
help="Name of the output view, either raw if the view is in the default DMS,"
" or in the form 'dms:view' if it is in another DMS.")
group=parser.add_argument_group('obi align specific options')
group.add_argument('--lcs','-C',
action="store", dest="align:alitype",
metavar='<ALIGNMENT TYPE>',
default='lcs',
type=str,
help="Compute alignment using the LCS method.")
group.add_argument('--threshold','-t',
action="store", dest="align:threshold",
metavar='<THRESHOLD>',
default=0.0,
type=float,
help="Score threshold. If the score is normalized and expressed in similarity (default),"
" it is an identity, e.g. 0.95 for an identity of 95%%. If the score is normalized"
" and expressed in distance, it is (1.0 - identity), e.g. 0.05 for an identity of 95%%."
" If the score is not normalized and expressed in similarity, it is the length of the"
" Longest Common Subsequence. If the score is not normalized and expressed in distance,"
" it is (reference length - LCS length)."
" Only sequence pairs with a similarity above <THRESHOLD> are printed. Default: 0.00"
" (no threshold).")
group.add_argument('--longest_length','-L',
action="store_const", dest="align:reflength",
default="ali",
const="longest",
help="The reference length is the length of the longest sequence."
" Default: the reference length is the length of the alignment.")
group.add_argument('--shortest_length','-l',
action="store_const", dest="align:reflength",
default="ali",
const="shortest",
help="The reference length is the length of the shortest sequence."
" Default: the reference length is the length of the alignment.")
group.add_argument('--raw','-r',
action="store_false", dest="align:normalize",
default=True,
help="Raw score, not normalized. Default: score is normalized with the reference sequence length.")
group.add_argument('--distance','-D',
action="store_false", dest="align:similarity",
default=True,
help="Score is expressed in distance. Default: score is expressed in similarity.")
def run(config):
#pb = ProgressBar(1, config, seconde=5) # TODO
# Open DMS
d = OBIDMS(config['obi']['defaultdms'])
# Open input view 1
iview = d.open_view(config['obi']['inputview'])
# TODO Open input view 2 if there is one
# Create output view if necessary
if config['obi']['outputview'] is not None :
oview = d.new_view(config['obi']['outputview'])
else :
oview = None
# TODO Take other alignment types into account when they'll be implemented
# Call cython alignment function
iview.align(output_view=oview)
print("Done.")