Alignpairedend: added alignment using shifting with best kmer similarity
(low level layer in C and Cython API)
This commit is contained in:
4
python/obitools3/commands/alignpairedend.pxd
Executable file
4
python/obitools3/commands/alignpairedend.pxd
Executable file
@ -0,0 +1,4 @@
|
||||
#cython: language_level=3
|
||||
|
||||
|
||||
cdef object buildAlignment(object direct, object reverse)
|
@ -2,19 +2,20 @@
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column, Column_line
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t, OBI_QUAL
|
||||
from obitools3.dms.column.column cimport Column
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_QUAL
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
|
||||
from obitools3.libalign._qsrassemble import QSolexaRightReverseAssemble
|
||||
from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequence
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq_Stored, Nuc_Seq
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
|
||||
|
||||
|
||||
import sys
|
||||
import os
|
||||
@ -27,6 +28,7 @@ REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY"
|
||||
__title__="Aligns paired-ended reads"
|
||||
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
@ -41,14 +43,6 @@ def addOptions(parser):
|
||||
type=str,
|
||||
help="URI to the reverse reads if they are in a different view than the forward reads")
|
||||
|
||||
# TODO
|
||||
# group.add_argument('--index-file',
|
||||
# action="store", dest="alignpairedend:index",
|
||||
# metavar="<FILENAME>",
|
||||
# default=None,
|
||||
# type=str,
|
||||
# help="URI to the index reads")
|
||||
|
||||
group.add_argument('--score-min',
|
||||
action="store", dest="alignpairedend:smin",
|
||||
metavar="#.###",
|
||||
@ -56,12 +50,22 @@ def addOptions(parser):
|
||||
type=float,
|
||||
help="Minimum score for keeping alignments")
|
||||
|
||||
group.add_argument('-A', '--true-ali',
|
||||
action="store_true", dest="alignpairedend:trueali",
|
||||
default=False,
|
||||
help="Performs gap free end alignment of sequences instead of using kmers to compute alignments (slower).")
|
||||
|
||||
group.add_argument('-k', '--kmer-size',
|
||||
action="store", dest="alignpairedend:kmersize",
|
||||
metavar="#",
|
||||
default=3,
|
||||
type=int,
|
||||
help="K-mer size for kmer comparisons, between 1 and 4 (not when using -A option; default: 3)")
|
||||
|
||||
# TODO declarations, cdef, imports
|
||||
|
||||
la = QSolexaReverseAssemble()
|
||||
ra = QSolexaRightReverseAssemble()
|
||||
def buildAlignment(object direct, object reverse):
|
||||
cdef object buildAlignment(object direct, object reverse):
|
||||
|
||||
if len(direct)==0 or len(reverse)==0:
|
||||
return None
|
||||
@ -82,9 +86,9 @@ def buildAlignment(object direct, object reverse):
|
||||
ali = rali
|
||||
|
||||
return ali
|
||||
|
||||
|
||||
|
||||
def alignmentIterator(entries):
|
||||
def alignmentIterator(entries, aligner):
|
||||
|
||||
if type(entries) == list:
|
||||
two_views = True
|
||||
@ -94,7 +98,7 @@ def alignmentIterator(entries):
|
||||
else:
|
||||
two_views = False
|
||||
entries_len = len(entries)
|
||||
|
||||
|
||||
for i in range(entries_len):
|
||||
if two_views:
|
||||
seqF = forward[i]
|
||||
@ -104,9 +108,12 @@ def alignmentIterator(entries):
|
||||
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME])
|
||||
seqF.pop(REVERSE_SEQ_COLUMN_NAME)
|
||||
seqF.pop(REVERSE_QUALITY_COLUMN_NAME)
|
||||
ali = buildAlignment(seqF, seqR)
|
||||
|
||||
ali = aligner(seqF, seqR)
|
||||
|
||||
if ali is None:
|
||||
continue
|
||||
|
||||
yield ali
|
||||
|
||||
|
||||
@ -160,11 +167,6 @@ def run(config):
|
||||
else:
|
||||
entries_len = len(entries)
|
||||
|
||||
# if "index" in config["alignpairedend"]: # TODO
|
||||
# index = open_uri(config["alignpairedend"]["index"])
|
||||
# else:
|
||||
# index = None
|
||||
|
||||
# Open the output
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
input=False,
|
||||
@ -174,22 +176,30 @@ def run(config):
|
||||
|
||||
view = output[1]
|
||||
|
||||
Column.new_column(view, b"QUALITY", OBI_QUAL) #TODO output URI quality option
|
||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
|
||||
|
||||
if 'smin' in config['alignpairedend']:
|
||||
config['alignpairedend']['sminL'] = config['alignpairedend']['smin']
|
||||
config['alignpairedend']['sminR'] = config['alignpairedend']['smin']
|
||||
sminL = config['alignpairedend']['sminL']
|
||||
sminR = config['alignpairedend']['sminR']
|
||||
smin = config['alignpairedend']['smin']
|
||||
else:
|
||||
sminL = 0
|
||||
sminR = 0
|
||||
smin = 0
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(entries_len, config, seconde=5)
|
||||
|
||||
ba = alignmentIterator(entries)
|
||||
|
||||
if config['alignpairedend']['trueali']:
|
||||
kmer_ali = False
|
||||
aligner = buildAlignment
|
||||
else :
|
||||
kmer_ali = True
|
||||
if type(entries) == list:
|
||||
forward = entries[0]
|
||||
reverse = entries[1]
|
||||
aligner = Kmer_similarity(forward, view2=reverse, kmer_size=config['alignpairedend']['kmersize'])
|
||||
else:
|
||||
aligner = Kmer_similarity(entries, kmer_size=config['alignpairedend']['kmersize'])
|
||||
|
||||
ba = alignmentIterator(entries, aligner)
|
||||
|
||||
i = 0
|
||||
for ali in ba:
|
||||
|
||||
@ -197,31 +207,33 @@ def run(config):
|
||||
|
||||
consensus = view[i]
|
||||
|
||||
if sminL > 0:
|
||||
if ((ali.direction=='left' and ali.score > sminL)
|
||||
or (ali.score > sminR)):
|
||||
buildConsensus(ali, consensus)
|
||||
if not two_views:
|
||||
seqF = entries[i]
|
||||
else:
|
||||
seqF = forward[i]
|
||||
|
||||
if smin > 0:
|
||||
if (ali.score > smin) :
|
||||
buildConsensus(ali, consensus, seqF)
|
||||
else:
|
||||
seqF = ali[0].wrapped
|
||||
if not two_views:
|
||||
seq = entries[i]
|
||||
seqR = Nuc_Seq(seq.id, seq[REVERSE_SEQ_COLUMN_NAME], quality = seq[REVERSE_QUALITY_COLUMN_NAME])
|
||||
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality = seqF[REVERSE_QUALITY_COLUMN_NAME])
|
||||
else:
|
||||
seqR = reverse[i]
|
||||
buildJoinedSequence(ali, seqR, consensus)
|
||||
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
|
||||
|
||||
consensus[b"sminL"] = sminL
|
||||
consensus[b"sminR"] = sminR
|
||||
consensus[b"smin"] = smin
|
||||
else:
|
||||
buildConsensus(ali, consensus)
|
||||
|
||||
# TODO
|
||||
# if "index" in config['alignpairedend'] and config['alignpairedend']['index'] is not None:
|
||||
# idx = str(config['alignpairedend']['index'].next())
|
||||
# consensus[b"illumina_index"] = idx
|
||||
buildConsensus(ali, consensus, seqF)
|
||||
|
||||
if kmer_ali :
|
||||
ali.free()
|
||||
|
||||
i+=1
|
||||
|
||||
if kmer_ali :
|
||||
aligner.free()
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
|
Reference in New Issue
Block a user