Commands: ngsfilter and alignpairedend can now be used in whichever
order
This commit is contained in:
@ -8,13 +8,17 @@ from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
|||||||
from obitools3.dms.column.column cimport Column, Column_line
|
from obitools3.dms.column.column cimport Column, Column_line
|
||||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
|
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
|
||||||
from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t, OBI_QUAL
|
from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t, OBI_QUAL
|
||||||
from obitools3.apps.optiongroups import addMinimalOutputOption
|
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.align._qsassemble import QSolexaReverseAssemble
|
from obitools3.align._qsassemble import QSolexaReverseAssemble
|
||||||
from obitools3.align._qsrassemble import QSolexaRightReverseAssemble
|
from obitools3.align._qsrassemble import QSolexaRightReverseAssemble
|
||||||
from obitools3.align._solexapairend import buildConsensus, buildJoinedSequence
|
from obitools3.align._solexapairend import buildConsensus, buildJoinedSequence
|
||||||
from obitools3.dms.obiseq cimport Nuc_Seq_Stored
|
from obitools3.dms.obiseq cimport Nuc_Seq_Stored, Nuc_Seq
|
||||||
|
|
||||||
|
|
||||||
|
REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE"
|
||||||
|
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY"
|
||||||
|
|
||||||
|
|
||||||
__title__="Aligns paired-ended reads"
|
__title__="Aligns paired-ended reads"
|
||||||
@ -22,23 +26,17 @@ __title__="Aligns paired-ended reads"
|
|||||||
|
|
||||||
def addOptions(parser):
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addSequenceInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
group = parser.add_argument_group('obi alignpairedend specific options')
|
group = parser.add_argument_group('obi alignpairedend specific options')
|
||||||
|
|
||||||
group.add_argument('-f', '--forward-reads',
|
|
||||||
action="store", dest="alignpairedend:forward",
|
|
||||||
metavar="<URI>",
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
help="URI to the forward reads")
|
|
||||||
|
|
||||||
group.add_argument('-r', '--reverse-reads',
|
group.add_argument('-R', '--reverse-reads',
|
||||||
action="store", dest="alignpairedend:reverse",
|
action="store", dest="alignpairedend:reverse",
|
||||||
metavar="<URI>",
|
metavar="<URI>",
|
||||||
default=None,
|
default=None,
|
||||||
type=str,
|
type=str,
|
||||||
help="URI to the reverse reads")
|
help="URI to the reverse reads if they are in a different view than the forward reads")
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# group.add_argument('--index-file',
|
# group.add_argument('--index-file',
|
||||||
@ -60,7 +58,7 @@ def addOptions(parser):
|
|||||||
|
|
||||||
la = QSolexaReverseAssemble()
|
la = QSolexaReverseAssemble()
|
||||||
ra = QSolexaRightReverseAssemble()
|
ra = QSolexaRightReverseAssemble()
|
||||||
def buildAlignment(Nuc_Seq_Stored direct, Nuc_Seq_Stored reverse):
|
def buildAlignment(object direct, object reverse):
|
||||||
|
|
||||||
if len(direct)==0 or len(reverse)==0:
|
if len(direct)==0 or len(reverse)==0:
|
||||||
return None
|
return None
|
||||||
@ -83,9 +81,27 @@ def buildAlignment(Nuc_Seq_Stored direct, Nuc_Seq_Stored reverse):
|
|||||||
return ali
|
return ali
|
||||||
|
|
||||||
|
|
||||||
def alignmentIterator(View_NUC_SEQS forward, View_NUC_SEQS reverse):
|
def alignmentIterator(entries):
|
||||||
for i in range(len(forward)):
|
|
||||||
ali = buildAlignment(forward[i], reverse[i])
|
if type(entries) == list:
|
||||||
|
two_views = True
|
||||||
|
forward = entries[0]
|
||||||
|
reverse = entries[1]
|
||||||
|
entries_len = len(forward)
|
||||||
|
else:
|
||||||
|
two_views = False
|
||||||
|
entries_len = len(entries)
|
||||||
|
|
||||||
|
for i in range(entries_len):
|
||||||
|
if two_views:
|
||||||
|
seqF = forward[i]
|
||||||
|
seqR = reverse[i]
|
||||||
|
else:
|
||||||
|
seqF = Nuc_Seq.new_from_stored(entries[i])
|
||||||
|
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME])
|
||||||
|
seqF.pop(REVERSE_SEQ_COLUMN_NAME)
|
||||||
|
seqF.pop(REVERSE_QUALITY_COLUMN_NAME)
|
||||||
|
ali = buildAlignment(seqF, seqR)
|
||||||
if ali is None:
|
if ali is None:
|
||||||
continue
|
continue
|
||||||
yield ali
|
yield ali
|
||||||
@ -98,22 +114,49 @@ def run(config):
|
|||||||
logger("info", "obi alignpairedend")
|
logger("info", "obi alignpairedend")
|
||||||
|
|
||||||
# Open the input
|
# Open the input
|
||||||
finput = open_uri(config["alignpairedend"]["forward"])
|
|
||||||
if finput is None:
|
|
||||||
raise Exception("Could not open forward reads")
|
|
||||||
forward = finput[1]
|
|
||||||
rinput = open_uri(config["alignpairedend"]["reverse"])
|
|
||||||
if rinput is None:
|
|
||||||
raise Exception("Could not open reverse reads")
|
|
||||||
reverse = rinput[1]
|
|
||||||
|
|
||||||
if len(forward) != len(reverse):
|
|
||||||
raise Exception("Error: the number of forward and reverse reads are different")
|
|
||||||
|
|
||||||
if "index" in config["alignpairedend"]: # TODO
|
two_views = False
|
||||||
index = open_uri(config["alignpairedend"]["index"])
|
forward = None
|
||||||
|
reverse = None
|
||||||
|
input = None
|
||||||
|
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not open input reads")
|
||||||
|
if input[2] != View_NUC_SEQS:
|
||||||
|
raise NotImplementedError('obi alignpairedend only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
if "reverse" in config["alignpairedend"]:
|
||||||
|
|
||||||
|
two_views = True
|
||||||
|
|
||||||
|
forward = input[1]
|
||||||
|
|
||||||
|
rinput = open_uri(config["alignpairedend"]["reverse"])
|
||||||
|
if rinput is None:
|
||||||
|
raise Exception("Could not open reverse reads")
|
||||||
|
if rinput[2] != View_NUC_SEQS:
|
||||||
|
raise NotImplementedError('obi alignpairedend only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
reverse = rinput[1]
|
||||||
|
|
||||||
|
if len(forward) != len(reverse):
|
||||||
|
raise Exception("Error: the number of forward and reverse reads are different")
|
||||||
|
|
||||||
|
entries = [forward, reverse]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
index = None
|
entries = input[1]
|
||||||
|
|
||||||
|
if two_views:
|
||||||
|
entries_len = len(forward)
|
||||||
|
else:
|
||||||
|
entries_len = len(entries)
|
||||||
|
|
||||||
|
# if "index" in config["alignpairedend"]: # TODO
|
||||||
|
# index = open_uri(config["alignpairedend"]["index"])
|
||||||
|
# else:
|
||||||
|
# index = None
|
||||||
|
|
||||||
# Open the output
|
# Open the output
|
||||||
output = open_uri(config['obi']['outputURI'],
|
output = open_uri(config['obi']['outputURI'],
|
||||||
@ -135,9 +178,9 @@ def run(config):
|
|||||||
sminR = 0
|
sminR = 0
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(forward), config, seconde=5)
|
pb = ProgressBar(entries_len, config, seconde=5)
|
||||||
|
|
||||||
ba = alignmentIterator(forward, reverse)
|
ba = alignmentIterator(entries)
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for ali in ba:
|
for ali in ba:
|
||||||
@ -151,7 +194,13 @@ def run(config):
|
|||||||
or (ali.score > sminR)):
|
or (ali.score > sminR)):
|
||||||
buildConsensus(ali, consensus)
|
buildConsensus(ali, consensus)
|
||||||
else:
|
else:
|
||||||
buildJoinedSequence(ali, reverse[i], consensus)
|
seqF = ali[0].wrapped
|
||||||
|
if not two_views:
|
||||||
|
seq = entries[i]
|
||||||
|
seqR = Nuc_Seq(seq.id, seq[REVERSE_SEQ_COLUMN_NAME], quality = seq[REVERSE_QUALITY_COLUMN_NAME])
|
||||||
|
else:
|
||||||
|
seqR = reverse[i]
|
||||||
|
buildJoinedSequence(ali, seqR, consensus)
|
||||||
|
|
||||||
consensus[b"sminL"] = sminL
|
consensus[b"sminL"] = sminL
|
||||||
consensus[b"sminR"] = sminR
|
consensus[b"sminR"] = sminR
|
||||||
@ -165,7 +214,11 @@ def run(config):
|
|||||||
|
|
||||||
i+=1
|
i+=1
|
||||||
|
|
||||||
finput[0].close()
|
print("\n")
|
||||||
rinput[0].close()
|
print(repr(view))
|
||||||
|
|
||||||
|
input[0].close()
|
||||||
|
if two_views:
|
||||||
|
rinput[0].close()
|
||||||
output[0].close()
|
output[0].close()
|
||||||
|
|
||||||
|
@ -10,10 +10,16 @@ from obitools3.uri.decode import open_uri
|
|||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.align._freeendgapfm import FreeEndGapFullMatch
|
from obitools3.align._freeendgapfm import FreeEndGapFullMatch
|
||||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
|
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||||
|
|
||||||
from functools import reduce, cmp_to_key
|
from functools import reduce, cmp_to_key
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE"
|
||||||
|
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY"
|
||||||
|
|
||||||
|
|
||||||
__title__="Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers"
|
__title__="Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers"
|
||||||
|
|
||||||
|
|
||||||
@ -31,6 +37,20 @@ def addOptions(parser):
|
|||||||
default=None,
|
default=None,
|
||||||
help="URI to the view containing the samples definition (with tags, primers, sample names,...)")
|
help="URI to the view containing the samples definition (with tags, primers, sample names,...)")
|
||||||
|
|
||||||
|
# group.add_argument('-F', '--forward-reads',
|
||||||
|
# action="store", dest="ngsfilter:forward",
|
||||||
|
# metavar="<URI>",
|
||||||
|
# default=None,
|
||||||
|
# type=str,
|
||||||
|
# help="URI to the forward reads if the paired-end reads haven't been aligned yet")
|
||||||
|
|
||||||
|
group.add_argument('-R', '--reverse-reads',
|
||||||
|
action="store", dest="ngsfilter:reverse",
|
||||||
|
metavar="<URI>",
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="URI to the reverse reads if the paired-end reads haven't been aligned yet")
|
||||||
|
|
||||||
group.add_argument('-u','--unidentified',
|
group.add_argument('-u','--unidentified',
|
||||||
action="store", dest="ngsfilter:unidentified",
|
action="store", dest="ngsfilter:unidentified",
|
||||||
metavar="<URI>",
|
metavar="<URI>",
|
||||||
@ -66,7 +86,7 @@ class Primer:
|
|||||||
Primer.collection[sequence]=taglength
|
Primer.collection[sequence]=taglength
|
||||||
|
|
||||||
self.raw=sequence
|
self.raw=sequence
|
||||||
self.sequence = Nuc_Seq(b"primer", sequence)
|
self.sequence = Nuc_Seq(b"primer", sequence)
|
||||||
self.lseq = len(self.sequence)
|
self.lseq = len(self.sequence)
|
||||||
self.align=FreeEndGapFullMatch()
|
self.align=FreeEndGapFullMatch()
|
||||||
self.align.match=4
|
self.align.match=4
|
||||||
@ -76,9 +96,7 @@ class Primer:
|
|||||||
self.error=error
|
self.error=error
|
||||||
self.minscore = (self.lseq-error) * self.align.match + error * self.align.mismatch
|
self.minscore = (self.lseq-error) * self.align.match + error * self.align.mismatch
|
||||||
self.taglength=taglength
|
self.taglength=taglength
|
||||||
|
|
||||||
self.align.seqB=self.sequence
|
self.align.seqB=self.sequence
|
||||||
|
|
||||||
self.direct = direct
|
self.direct = direct
|
||||||
self.verbose=verbose
|
self.verbose=verbose
|
||||||
|
|
||||||
@ -100,9 +118,10 @@ class Primer:
|
|||||||
def __call__(self,sequence):
|
def __call__(self,sequence):
|
||||||
if len(sequence) <= self.lseq:
|
if len(sequence) <= self.lseq:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
self.align.seqA=sequence
|
self.align.seqA=sequence
|
||||||
ali=self.align()
|
ali=self.align()
|
||||||
|
|
||||||
if ali.score >= self.minscore:
|
if ali.score >= self.minscore:
|
||||||
score = ali.score
|
score = ali.score
|
||||||
start = ali[1].gaps[0][1]
|
start = ali[1].gaps[0][1]
|
||||||
@ -134,7 +153,7 @@ class Primer:
|
|||||||
__repr__=__str__
|
__repr__=__str__
|
||||||
|
|
||||||
|
|
||||||
cdef dict read_info_view(info_view, error=2, verbose=False):
|
cdef dict read_info_view(info_view, error=2, verbose=False, not_aligned=False):
|
||||||
infos = {}
|
infos = {}
|
||||||
for p in info_view:
|
for p in info_view:
|
||||||
forward=Primer(p[b'forward_primer'],
|
forward=Primer(p[b'forward_primer'],
|
||||||
@ -144,7 +163,7 @@ cdef dict read_info_view(info_view, error=2, verbose=False):
|
|||||||
|
|
||||||
fp = infos.get(forward,{})
|
fp = infos.get(forward,{})
|
||||||
infos[forward]=fp
|
infos[forward]=fp
|
||||||
|
|
||||||
reverse=Primer(p[b'reverse_primer'],
|
reverse=Primer(p[b'reverse_primer'],
|
||||||
len(p[b'reverse_tag']) if p[b'reverse_tag']!=b'-' else None,
|
len(p[b'reverse_tag']) if p[b'reverse_tag']!=b'-' else None,
|
||||||
False,
|
False,
|
||||||
@ -153,15 +172,23 @@ cdef dict read_info_view(info_view, error=2, verbose=False):
|
|||||||
rp = infos.get(reverse,{})
|
rp = infos.get(reverse,{})
|
||||||
infos[reverse]=rp
|
infos[reverse]=rp
|
||||||
|
|
||||||
cf=forward.reverse_complement()
|
if not_aligned:
|
||||||
cr=reverse.reverse_complement()
|
dpp=fp.get(reverse,{})
|
||||||
|
fp[reverse]=dpp
|
||||||
dpp=fp.get(cr,{})
|
|
||||||
fp[cr]=dpp
|
|
||||||
|
|
||||||
rpp=rp.get(cf,{})
|
|
||||||
rp[cf]=rpp
|
|
||||||
|
|
||||||
|
rpp=rp.get(forward,{})
|
||||||
|
rp[forward]=rpp
|
||||||
|
|
||||||
|
else:
|
||||||
|
cf=forward.reverse_complement()
|
||||||
|
cr=reverse.reverse_complement()
|
||||||
|
|
||||||
|
dpp=fp.get(cr,{})
|
||||||
|
fp[cr]=dpp
|
||||||
|
|
||||||
|
rpp=rp.get(cf,{})
|
||||||
|
rp[cf]=rpp
|
||||||
|
|
||||||
tags = (p[b'forward_tag'] if p[b'forward_tag']!=b'-' else None,
|
tags = (p[b'forward_tag'] if p[b'forward_tag']!=b'-' else None,
|
||||||
p[b'reverse_tag'] if p[b'reverse_tag']!=b'-' else None)
|
p[b'reverse_tag'] if p[b'reverse_tag']!=b'-' else None)
|
||||||
|
|
||||||
@ -181,7 +208,7 @@ cdef dict read_info_view(info_view, error=2, verbose=False):
|
|||||||
return infos
|
return infos
|
||||||
|
|
||||||
|
|
||||||
cdef tuple annotate(sequence, infos, verbose=False):
|
cdef tuple annotate(sequences, infos, verbose=False):
|
||||||
|
|
||||||
def sortMatch(m1, m2):
|
def sortMatch(m1, m2):
|
||||||
if m1[1] is None and m2[1] is None:
|
if m1[1] is None and m2[1] is None:
|
||||||
@ -195,59 +222,102 @@ cdef tuple annotate(sequence, infos, verbose=False):
|
|||||||
|
|
||||||
return (m1[1][1] > m2[1][2]) - (m1[1][1] < m2[1][2])
|
return (m1[1][1] > m2[1][2]) - (m1[1][1] < m2[1][2])
|
||||||
|
|
||||||
if hasattr(sequence, "quality_array"):
|
not_aligned = len(sequences) > 1
|
||||||
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality_array),0)/len(sequence.quality_array)*10
|
sequenceF = sequences[0]
|
||||||
sequence[b'avg_quality']=q
|
sequenceR = None
|
||||||
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality_array[0:10]),0)
|
if not not_aligned:
|
||||||
sequence[b'head_quality']=q
|
final_sequence = sequenceF
|
||||||
if len(sequence.quality_array[10:-10]) :
|
else:
|
||||||
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality_array[10:-10]),0)/len(sequence.quality_array[10:-10])*10
|
final_sequence = sequenceF.clone() # TODO maybe not cloning and then deleting quality tags is more efficient
|
||||||
sequence[b'mid_quality']=q
|
|
||||||
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality_array[-10:]),0)
|
if not_aligned:
|
||||||
sequence[b'tail_quality']=q
|
sequenceR = sequences[1]
|
||||||
|
final_sequence[REVERSE_SEQ_COLUMN_NAME] = sequenceR.seq
|
||||||
|
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = sequenceR.quality
|
||||||
|
|
||||||
|
for seq in sequences:
|
||||||
|
if hasattr(seq, "quality_array"):
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array),0)/len(seq.quality_array)*10
|
||||||
|
seq[b'avg_quality']=q
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[0:10]),0)
|
||||||
|
seq[b'head_quality']=q
|
||||||
|
if len(seq.quality_array[10:-10]) :
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[10:-10]),0)/len(seq.quality_array[10:-10])*10
|
||||||
|
seq[b'mid_quality']=q
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[-10:]),0)
|
||||||
|
seq[b'tail_quality']=q
|
||||||
|
|
||||||
directmatch = [(p,p(sequence)) for p in infos]
|
# Try direct matching:
|
||||||
directmatch.sort(key=cmp_to_key(sortMatch))
|
directmatch = None
|
||||||
directmatch=directmatch[0] if directmatch[0][1] is not None else None
|
first_matched_seq = None
|
||||||
|
second_matched_seq = None
|
||||||
|
for seq in sequences:
|
||||||
|
for p in infos:
|
||||||
|
directmatch = (p, p(seq))
|
||||||
|
if directmatch[1] is not None:
|
||||||
|
break
|
||||||
|
if directmatch[1] is not None:
|
||||||
|
first_matched_seq = seq
|
||||||
|
if id(first_matched_seq) == id(sequenceF) and not_aligned:
|
||||||
|
second_matched_seq = sequenceR
|
||||||
|
else:
|
||||||
|
second_matched_seq = sequenceF
|
||||||
|
break
|
||||||
|
if directmatch[1] is None:
|
||||||
|
directmatch = None
|
||||||
|
|
||||||
if directmatch is None:
|
if directmatch is None:
|
||||||
sequence[b'error']=b'No primer match'
|
final_sequence[b'error']=b'No primer match'
|
||||||
return False,sequence
|
return False, final_sequence
|
||||||
|
|
||||||
|
match = first_matched_seq[directmatch[1][1]:directmatch[1][2]]
|
||||||
|
|
||||||
match = sequence[directmatch[1][1]:directmatch[1][2]]
|
if not not_aligned:
|
||||||
|
final_sequence[b'seq_length_ori']=len(final_sequence)
|
||||||
|
|
||||||
sequence[b'seq_length_ori']=len(sequence)
|
if not not_aligned or id(first_matched_seq) == id(sequenceF):
|
||||||
|
final_sequence = final_sequence[directmatch[1][2]:]
|
||||||
sequence = sequence[directmatch[1][2]:]
|
else:
|
||||||
|
cut_seq = sequenceR[directmatch[1][2]:]
|
||||||
|
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq
|
||||||
|
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality
|
||||||
|
|
||||||
if directmatch[0].direct:
|
if directmatch[0].direct:
|
||||||
sequence[b'direction']=b'forward'
|
final_sequence[b'direction']=b'forward'
|
||||||
sequence[b'forward_score']=directmatch[1][0]
|
final_sequence[b'forward_score']=directmatch[1][0]
|
||||||
sequence[b'forward_primer']=directmatch[0].raw
|
final_sequence[b'forward_primer']=directmatch[0].raw
|
||||||
sequence[b'forward_match']=match.seq
|
final_sequence[b'forward_match']=match.seq
|
||||||
|
|
||||||
else:
|
else:
|
||||||
sequence[b'direction']=b'reverse'
|
final_sequence[b'direction']=b'reverse'
|
||||||
sequence[b'reverse_score']=directmatch[1][0]
|
final_sequence[b'reverse_score']=directmatch[1][0]
|
||||||
sequence[b'reverse_primer']=directmatch[0].raw
|
final_sequence[b'reverse_primer']=directmatch[0].raw
|
||||||
sequence[b'reverse_match']=match.seq
|
final_sequence[b'reverse_match']=match.seq
|
||||||
|
|
||||||
infos = infos[directmatch[0]]
|
infos = infos[directmatch[0]]
|
||||||
reversematch = [(p,p(sequence)) for p in infos if p is not None]
|
|
||||||
reversematch.sort(key=cmp_to_key(sortMatch))
|
# Try reverse matching on the other sequence:
|
||||||
reversematch = reversematch[0] if reversematch[0][1] is not None else None
|
for p in infos:
|
||||||
|
reversematch = (p, p(second_matched_seq))
|
||||||
|
|
||||||
|
if reversematch[1] is None and not_aligned:
|
||||||
|
# Try matching on the same sequence than the first match
|
||||||
|
reverse_p = p.reverse_complement()
|
||||||
|
reversematch = (reverse_p, reverse_p(first_matched_seq))
|
||||||
|
|
||||||
|
if reversematch[1] is None:
|
||||||
|
reversematch = None
|
||||||
|
|
||||||
if reversematch is None and None not in infos:
|
if reversematch is None and None not in infos:
|
||||||
if directmatch[0].direct:
|
if directmatch[0].direct:
|
||||||
message = b'No reverse primer match'
|
message = b'No reverse primer match'
|
||||||
else:
|
else:
|
||||||
message = b'No direct primer match'
|
message = b'No direct primer match'
|
||||||
|
final_sequence[b'error']=message
|
||||||
sequence[b'error']=message
|
return False, final_sequence
|
||||||
return False,sequence
|
|
||||||
|
|
||||||
if reversematch is None:
|
if reversematch is None:
|
||||||
sequence[b'status']=b'partial'
|
final_sequence[b'status']=b'partial'
|
||||||
|
|
||||||
if directmatch[0].direct:
|
if directmatch[0].direct:
|
||||||
tags=(directmatch[1][3],None)
|
tags=(directmatch[1][3],None)
|
||||||
@ -255,48 +325,54 @@ cdef tuple annotate(sequence, infos, verbose=False):
|
|||||||
tags=(None,directmatch[1][3])
|
tags=(None,directmatch[1][3])
|
||||||
|
|
||||||
samples = infos[None]
|
samples = infos[None]
|
||||||
|
|
||||||
else:
|
|
||||||
sequence[b'status']=b'full'
|
|
||||||
|
|
||||||
match = sequence[reversematch[1][1]:reversematch[1][2]]
|
else:
|
||||||
|
final_sequence[b'status']=b'full'
|
||||||
|
|
||||||
|
match = second_matched_seq[reversematch[1][1]:reversematch[1][2]]
|
||||||
match = match.reverse_complement
|
match = match.reverse_complement
|
||||||
sequence = sequence[0:reversematch[1][1]]
|
|
||||||
|
if not not_aligned or id(second_matched_seq) == id(sequenceF):
|
||||||
|
final_sequence = final_sequence[0:reversematch[1][1]]
|
||||||
|
else:
|
||||||
|
cut_seq = sequenceR[reversematch[1][2]:]
|
||||||
|
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq
|
||||||
|
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality
|
||||||
|
|
||||||
if directmatch[0].direct:
|
if directmatch[0].direct:
|
||||||
tags=(directmatch[1][3],reversematch[1][3])
|
tags=(directmatch[1][3], reversematch[1][3])
|
||||||
sequence[b'reverse_score']=reversematch[1][0]
|
final_sequence[b'reverse_score'] = reversematch[1][0]
|
||||||
sequence[b'reverse_primer']=reversematch[0].raw
|
final_sequence[b'reverse_primer'] = reversematch[0].raw
|
||||||
sequence[b'reverse_match']=match.seq
|
final_sequence[b'reverse_match'] = match.seq
|
||||||
|
|
||||||
else:
|
else:
|
||||||
tags=(reversematch[1][3],directmatch[1][3])
|
tags=(reversematch[1][3], directmatch[1][3])
|
||||||
sequence[b'forward_score']=reversematch[1][0]
|
final_sequence[b'forward_score'] = reversematch[1][0]
|
||||||
sequence[b'forward_primer']=reversematch[0].raw
|
final_sequence[b'forward_primer'] = reversematch[0].raw
|
||||||
sequence[b'forward_match']=match.seq
|
final_sequence[b'forward_match'] = match.seq
|
||||||
|
|
||||||
if tags[0] is not None:
|
if tags[0] is not None:
|
||||||
sequence[b'forward_tag'] = tags[0]
|
final_sequence[b'forward_tag'] = tags[0]
|
||||||
if tags[1] is not None:
|
if tags[1] is not None:
|
||||||
sequence[b'reverse_tag'] = tags[1]
|
final_sequence[b'reverse_tag'] = tags[1]
|
||||||
|
|
||||||
samples = infos[reversematch[0]]
|
samples = infos[reversematch[0]]
|
||||||
|
|
||||||
if not directmatch[0].direct:
|
if not directmatch[0].direct and not not_aligned: # don't reverse complement if not_aligned
|
||||||
sequence=sequence.reverse_complement
|
final_sequence = final_sequence.reverse_complement
|
||||||
|
|
||||||
sample=None
|
sample=None
|
||||||
|
|
||||||
if tags[0] is not None: # Direct tag known
|
if tags[0] is not None: # Direct tag known
|
||||||
if tags[1] is not None: # Reverse tag known
|
if tags[1] is not None: # Reverse tag known
|
||||||
sample = samples.get(tags, None)
|
sample = samples.get(tags, None)
|
||||||
else: # Reverse tag known
|
else: # Reverse tag known
|
||||||
s=[samples[x] for x in samples if x[0]==tags[0]]
|
s=[samples[x] for x in samples if x[0]==tags[0]]
|
||||||
if len(s)==1:
|
if len(s)==1:
|
||||||
sample=s[0]
|
sample=s[0]
|
||||||
elif len(s)>1:
|
elif len(s)>1:
|
||||||
sequence[b'error']=b'multiple samples match tags'
|
final_sequence[b'error']=b'multiple samples match tags'
|
||||||
return False,sequence
|
return False, final_sequence
|
||||||
else:
|
else:
|
||||||
sample=None
|
sample=None
|
||||||
else:
|
else:
|
||||||
@ -305,19 +381,21 @@ cdef tuple annotate(sequence, infos, verbose=False):
|
|||||||
if len(s)==1:
|
if len(s)==1:
|
||||||
sample=s[0]
|
sample=s[0]
|
||||||
elif len(s)>1:
|
elif len(s)>1:
|
||||||
sequence[b'error']=b'multiple samples match tags'
|
final_sequence[b'error']=b'multiple samples match tags'
|
||||||
return False,sequence
|
return False, final_sequence
|
||||||
else: # Reverse tag known
|
else: # Reverse tag known
|
||||||
sample=None
|
sample=None
|
||||||
|
|
||||||
if sample is None:
|
if sample is None:
|
||||||
sequence[b'error']=b"Cannot assign sequence to a sample"
|
final_sequence[b'error']=b"Cannot assign sequence to a sample"
|
||||||
return False,sequence
|
return False, final_sequence
|
||||||
|
|
||||||
sequence.update(sample)
|
final_sequence.update(sample)
|
||||||
sequence[b'seq_length']=len(sequence)
|
|
||||||
|
if not not_aligned:
|
||||||
|
final_sequence[b'seq_length']=len(final_sequence)
|
||||||
|
|
||||||
return True, sequence
|
return True, final_sequence
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
def run(config):
|
||||||
@ -329,11 +407,43 @@ def run(config):
|
|||||||
assert config['ngsfilter']['info_view'] is not None, "Option -t must be specified"
|
assert config['ngsfilter']['info_view'] is not None, "Option -t must be specified"
|
||||||
|
|
||||||
# Open the input
|
# Open the input
|
||||||
|
|
||||||
|
forward = None
|
||||||
|
reverse = None
|
||||||
|
input = None
|
||||||
|
not_aligned = False
|
||||||
|
|
||||||
input = open_uri(config['obi']['inputURI'])
|
input = open_uri(config['obi']['inputURI'])
|
||||||
if input is None:
|
if input is None:
|
||||||
raise Exception("Could not read input view")
|
raise Exception("Could not open input reads")
|
||||||
if input[2] != View_NUC_SEQS:
|
if input[2] != View_NUC_SEQS:
|
||||||
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
if "reverse" in config["ngsfilter"]:
|
||||||
|
|
||||||
|
forward = input[1]
|
||||||
|
|
||||||
|
rinput = open_uri(config["ngsfilter"]["reverse"])
|
||||||
|
if rinput is None:
|
||||||
|
raise Exception("Could not open reverse reads")
|
||||||
|
if rinput[2] != View_NUC_SEQS:
|
||||||
|
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
reverse = rinput[1]
|
||||||
|
|
||||||
|
if len(forward) != len(reverse):
|
||||||
|
raise Exception("Error: the number of forward and reverse reads are different")
|
||||||
|
|
||||||
|
entries = [forward, reverse]
|
||||||
|
not_aligned = True
|
||||||
|
|
||||||
|
else:
|
||||||
|
entries = input[1]
|
||||||
|
|
||||||
|
if not_aligned:
|
||||||
|
entries_len = len(forward)
|
||||||
|
else:
|
||||||
|
entries_len = len(entries)
|
||||||
|
|
||||||
# Open the output
|
# Open the output
|
||||||
output = open_uri(config['obi']['outputURI'],
|
output = open_uri(config['obi']['outputURI'],
|
||||||
@ -343,10 +453,9 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
|
|
||||||
entries = input[1]
|
|
||||||
o_view = output[1]
|
o_view = output[1]
|
||||||
|
|
||||||
# Open the the view containing the informations about the tags and the primers
|
# Open the view containing the informations about the tags and the primers
|
||||||
info_input = open_uri(config['ngsfilter']['info_view'])
|
info_input = open_uri(config['ngsfilter']['info_view'])
|
||||||
if info_input is None:
|
if info_input is None:
|
||||||
raise Exception("Could not read the view containing the informations about the tags and the primers")
|
raise Exception("Could not read the view containing the informations about the tags and the primers")
|
||||||
@ -364,19 +473,27 @@ def run(config):
|
|||||||
unidentified = None
|
unidentified = None
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(entries), config, seconde=5)
|
pb = ProgressBar(entries_len, config, seconde=5)
|
||||||
|
|
||||||
# Check and store primers and tags
|
# Check and store primers and tags
|
||||||
infos = read_info_view(info_view, error=config['ngsfilter']['error'])
|
infos = read_info_view(info_view, error=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned) # TODO obi verbose option
|
||||||
|
|
||||||
|
if not_aligned:
|
||||||
|
Column.new_column(o_view, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ)
|
||||||
|
Column.new_column(o_view, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=o_view[REVERSE_SEQ_COLUMN_NAME].version)
|
||||||
|
|
||||||
|
Column.new_column(unidentified, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ)
|
||||||
|
Column.new_column(unidentified, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=unidentified[REVERSE_SEQ_COLUMN_NAME].version)
|
||||||
|
|
||||||
g = 0
|
g = 0
|
||||||
u = 0
|
u = 0
|
||||||
i = 0
|
|
||||||
try:
|
try:
|
||||||
for iseq in entries:
|
for i in range(entries_len):
|
||||||
pb(i)
|
pb(i)
|
||||||
i+=1
|
if not_aligned:
|
||||||
modseq = Nuc_Seq.new_from_stored(iseq)
|
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
||||||
|
else:
|
||||||
|
modseq = [Nuc_Seq.new_from_stored(entries[i])]
|
||||||
good, oseq = annotate(modseq, infos)
|
good, oseq = annotate(modseq, infos)
|
||||||
if good:
|
if good:
|
||||||
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||||
@ -386,7 +503,7 @@ def run(config):
|
|||||||
u+=1
|
u+=1
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
|
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
|
||||||
|
|
||||||
print("\n")
|
print("\n")
|
||||||
print(repr(o_view))
|
print(repr(o_view))
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user