Files
obitools3/python/obitools3/commands/alignpairedend.pyx
2018-10-21 17:35:18 +02:00

238 lines
7.1 KiB
Cython
Executable File

#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View
from obitools3.dms.view import RollbackException
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column, Column_line
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t, OBI_QUAL
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.align._qsassemble import QSolexaReverseAssemble
from obitools3.align._qsrassemble import QSolexaRightReverseAssemble
from obitools3.align._solexapairend import buildConsensus, buildJoinedSequence
from obitools3.dms.obiseq cimport Nuc_Seq_Stored, Nuc_Seq
import sys
import os
REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE"
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY"
__title__="Aligns paired-ended reads"
def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
group = parser.add_argument_group('obi alignpairedend specific options')
group.add_argument('-R', '--reverse-reads',
action="store", dest="alignpairedend:reverse",
metavar="<URI>",
default=None,
type=str,
help="URI to the reverse reads if they are in a different view than the forward reads")
# TODO
# group.add_argument('--index-file',
# action="store", dest="alignpairedend:index",
# metavar="<FILENAME>",
# default=None,
# type=str,
# help="URI to the index reads")
group.add_argument('--score-min',
action="store", dest="alignpairedend:smin",
metavar="#.###",
default=None,
type=float,
help="Minimum score for keeping alignments")
# TODO declarations, cdef, imports
la = QSolexaReverseAssemble()
ra = QSolexaRightReverseAssemble()
def buildAlignment(object direct, object reverse):
if len(direct)==0 or len(reverse)==0:
return None
la.seqA = direct
la.seqB = reverse
ali=la()
ali.direction='left'
ra.seqA = direct
ra.seqB = reverse
rali=ra()
rali.direction='right'
if ali.score < rali.score:
ali = rali
return ali
def alignmentIterator(entries):
if type(entries) == list:
two_views = True
forward = entries[0]
reverse = entries[1]
entries_len = len(forward)
else:
two_views = False
entries_len = len(entries)
for i in range(entries_len):
if two_views:
seqF = forward[i]
seqR = reverse[i]
else:
seqF = Nuc_Seq.new_from_stored(entries[i])
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME])
seqF.pop(REVERSE_SEQ_COLUMN_NAME)
seqF.pop(REVERSE_QUALITY_COLUMN_NAME)
ali = buildAlignment(seqF, seqR)
if ali is None:
continue
yield ali
def run(config):
DMS.obi_atexit()
logger("info", "obi alignpairedend")
# Open the input
two_views = False
forward = None
reverse = None
input = None
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not open input reads")
if input[2] != View_NUC_SEQS:
raise NotImplementedError('obi alignpairedend only works on NUC_SEQS views')
if "reverse" in config["alignpairedend"]:
two_views = True
forward = input[1]
rinput = open_uri(config["alignpairedend"]["reverse"])
if rinput is None:
raise Exception("Could not open reverse reads")
if rinput[2] != View_NUC_SEQS:
raise NotImplementedError('obi alignpairedend only works on NUC_SEQS views')
reverse = rinput[1]
if len(forward) != len(reverse):
raise Exception("Error: the number of forward and reverse reads are different")
entries = [forward, reverse]
input_dms_name = [forward.dms.name, reverse.dms.name]
input_view_name = [forward.name, reverse.name]
else:
entries = input[1]
input_dms_name = [entries.dms.name]
input_view_name = [entries.name]
if two_views:
entries_len = len(forward)
else:
entries_len = len(entries)
# if "index" in config["alignpairedend"]: # TODO
# index = open_uri(config["alignpairedend"]["index"])
# else:
# index = None
# Open the output
output = open_uri(config['obi']['outputURI'],
input=False,
newviewtype=View_NUC_SEQS)
if output is None:
raise Exception("Could not create output view")
view = output[1]
Column.new_column(view, b"QUALITY", OBI_QUAL) #TODO output URI quality option
if 'smin' in config['alignpairedend']:
config['alignpairedend']['sminL'] = config['alignpairedend']['smin']
config['alignpairedend']['sminR'] = config['alignpairedend']['smin']
sminL = config['alignpairedend']['sminL']
sminR = config['alignpairedend']['sminR']
else:
sminL = 0
sminR = 0
# Initialize the progress bar
pb = ProgressBar(entries_len, config, seconde=5)
ba = alignmentIterator(entries)
i = 0
for ali in ba:
pb(i)
consensus = view[i]
if sminL > 0:
if ((ali.direction=='left' and ali.score > sminL)
or (ali.score > sminR)):
buildConsensus(ali, consensus)
else:
seqF = ali[0].wrapped
if not two_views:
seq = entries[i]
seqR = Nuc_Seq(seq.id, seq[REVERSE_SEQ_COLUMN_NAME], quality = seq[REVERSE_QUALITY_COLUMN_NAME])
else:
seqR = reverse[i]
buildJoinedSequence(ali, seqR, consensus)
consensus[b"sminL"] = sminL
consensus[b"sminR"] = sminR
else:
buildConsensus(ali, consensus)
# TODO
# if "index" in config['alignpairedend'] and config['alignpairedend']['index'] is not None:
# idx = str(config['alignpairedend']['index'].next())
# consensus[b"illumina_index"] = idx
i+=1
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
output[0].record_command_line(command_line)
print("\n")
print(repr(view))
input[0].close()
if two_views:
rinput[0].close()
output[0].close()