New command: obi ngsfilter
This commit is contained in:
11
python/obitools3/align/_freeendgap.pxd
Executable file
11
python/obitools3/align/_freeendgap.pxd
Executable file
@ -0,0 +1,11 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from ._nws cimport *
|
||||||
|
|
||||||
|
cdef class FreeEndGap(NWS):
|
||||||
|
cdef double xsmax
|
||||||
|
cdef int xmax
|
||||||
|
|
||||||
|
|
||||||
|
cdef double doAlignment(self) except? 0
|
||||||
|
|
160
python/obitools3/align/_freeendgap.pyx
Executable file
160
python/obitools3/align/_freeendgap.pyx
Executable file
@ -0,0 +1,160 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
'''
|
||||||
|
Created on 6 Nov. 2009
|
||||||
|
|
||||||
|
@author: coissac
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
cdef class FreeEndGap(NWS):
|
||||||
|
|
||||||
|
def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
|
||||||
|
NWS.__init__(self,match,mismatch,opengap,extgap)
|
||||||
|
self.xsmax=0
|
||||||
|
self.xmax=0
|
||||||
|
|
||||||
|
cdef double doAlignment(self) except? 0:
|
||||||
|
cdef int i # vertical index
|
||||||
|
cdef int j # horizontal index
|
||||||
|
cdef int idx
|
||||||
|
cdef int idx0
|
||||||
|
cdef int idx1
|
||||||
|
cdef int jump
|
||||||
|
cdef int delta
|
||||||
|
cdef double score
|
||||||
|
cdef double scoremax
|
||||||
|
cdef int path
|
||||||
|
|
||||||
|
assert self.hSeq.length > self.vSeq.length, \
|
||||||
|
"Sequence B must be shorter than sequence A"
|
||||||
|
|
||||||
|
|
||||||
|
if self.needToCompute:
|
||||||
|
self.allocate()
|
||||||
|
self.reset()
|
||||||
|
self.xsmax=0
|
||||||
|
self.xmax=0
|
||||||
|
|
||||||
|
for j in range(1,self.hSeq.length+1):
|
||||||
|
idx = self.index(j,0)
|
||||||
|
self.matrix.matrix[idx].score = 0
|
||||||
|
self.matrix.matrix[idx].path = j
|
||||||
|
|
||||||
|
for i in range(1,self.vSeq.length+1):
|
||||||
|
idx = self.index(0,i)
|
||||||
|
self.matrix.matrix[idx].score = self._opengap + (self._extgap * (i-1))
|
||||||
|
self.matrix.matrix[idx].path = -i
|
||||||
|
|
||||||
|
idx0=self.index(-1,0)
|
||||||
|
idx1=self.index(0,1)
|
||||||
|
for i in range(1,self.vSeq.length+1):
|
||||||
|
idx0+=1
|
||||||
|
idx1+=1
|
||||||
|
for j in range(1,self.hSeq.length+1):
|
||||||
|
|
||||||
|
# 1 - came from diagonal
|
||||||
|
#idx = self.index(j-1,i-1)
|
||||||
|
idx = idx0
|
||||||
|
# print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
|
||||||
|
scoremax = self.matrix.matrix[idx].score + \
|
||||||
|
self.matchScore(j,i)
|
||||||
|
path = 0
|
||||||
|
|
||||||
|
# print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
|
||||||
|
|
||||||
|
# 2 - open horizontal gap
|
||||||
|
# idx = self.index(j-1,i)
|
||||||
|
idx = idx1 - 1
|
||||||
|
score = self.matrix.matrix[idx].score+ \
|
||||||
|
self._opengap
|
||||||
|
if score > scoremax :
|
||||||
|
scoremax = score
|
||||||
|
path = +1
|
||||||
|
|
||||||
|
# 3 - open vertical gap
|
||||||
|
# idx = self.index(j,i-1)
|
||||||
|
idx = idx0 + 1
|
||||||
|
score = self.matrix.matrix[idx].score + \
|
||||||
|
self._opengap
|
||||||
|
if score > scoremax :
|
||||||
|
scoremax = score
|
||||||
|
path = -1
|
||||||
|
|
||||||
|
# 4 - extend horizontal gap
|
||||||
|
jump = self.matrix.bestHJump[i]
|
||||||
|
if jump >= 0:
|
||||||
|
idx = self.index(jump,i)
|
||||||
|
delta = j-jump
|
||||||
|
score = self.matrix.matrix[idx].score + \
|
||||||
|
self._extgap * delta
|
||||||
|
if score > scoremax :
|
||||||
|
scoremax = score
|
||||||
|
path = delta+1
|
||||||
|
|
||||||
|
# 5 - extend vertical gap
|
||||||
|
jump = self.matrix.bestVJump[j]
|
||||||
|
if jump >= 0:
|
||||||
|
idx = self.index(j,jump)
|
||||||
|
delta = i-jump
|
||||||
|
score = self.matrix.matrix[idx].score + \
|
||||||
|
self._extgap * delta
|
||||||
|
if score > scoremax :
|
||||||
|
scoremax = score
|
||||||
|
path = -delta-1
|
||||||
|
|
||||||
|
# idx = self.index(j,i)
|
||||||
|
idx = idx1
|
||||||
|
self.matrix.matrix[idx].score = scoremax
|
||||||
|
self.matrix.matrix[idx].path = path
|
||||||
|
|
||||||
|
if path == -1:
|
||||||
|
self.matrix.bestVJump[j]=i
|
||||||
|
elif path == +1 :
|
||||||
|
self.matrix.bestHJump[i]=j
|
||||||
|
|
||||||
|
if i==self.vSeq.length and scoremax > self.xsmax:
|
||||||
|
self.xsmax=scoremax
|
||||||
|
self.xmax=j
|
||||||
|
idx0+=1
|
||||||
|
idx1+=1
|
||||||
|
|
||||||
|
self.sequenceChanged=False
|
||||||
|
self.scoreChanged=False
|
||||||
|
|
||||||
|
return self.xsmax
|
||||||
|
|
||||||
|
cdef void backtrack(self):
|
||||||
|
#cdef list path=[]
|
||||||
|
cdef int i
|
||||||
|
cdef int j
|
||||||
|
cdef int p
|
||||||
|
|
||||||
|
self.doAlignment()
|
||||||
|
j=self.xmax
|
||||||
|
i=self.vSeq.length
|
||||||
|
self.path=allocatePath(i,j+1,self.path)
|
||||||
|
|
||||||
|
if self.xmax<self.hSeq.length:
|
||||||
|
self.path.path[self.path.length]=self.hSeq.length-self.xmax
|
||||||
|
self.path.length+=1
|
||||||
|
|
||||||
|
while (i or j):
|
||||||
|
p=self.matrix.matrix[self.index(j,i)].path
|
||||||
|
self.path.path[self.path.length]=p
|
||||||
|
self.path.length+=1
|
||||||
|
#path.append(p)
|
||||||
|
if p==0:
|
||||||
|
i-=1
|
||||||
|
j-=1
|
||||||
|
elif p < 0:
|
||||||
|
i+=p
|
||||||
|
else:
|
||||||
|
j-=p
|
||||||
|
|
||||||
|
#path.reverse()
|
||||||
|
#reversePath(self.path)
|
||||||
|
self.path.hStart=0
|
||||||
|
self.path.vStart=0
|
||||||
|
#return 0,0,path
|
||||||
|
|
7
python/obitools3/align/_freeendgapfm.pxd
Executable file
7
python/obitools3/align/_freeendgapfm.pxd
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from ._freeendgap cimport *
|
||||||
|
|
||||||
|
cdef class FreeEndGapFullMatch(FreeEndGap):
|
||||||
|
cdef double matchScore(self,int h, int v)
|
||||||
|
|
18
python/obitools3/align/_freeendgapfm.pyx
Executable file
18
python/obitools3/align/_freeendgapfm.pyx
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
'''
|
||||||
|
Created on 6 Nov. 2009
|
||||||
|
|
||||||
|
@author: coissac
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
cdef class FreeEndGapFullMatch(FreeEndGap):
|
||||||
|
|
||||||
|
cdef double matchScore(self,int h, int v):
|
||||||
|
cdef double score
|
||||||
|
if iupacMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1]):
|
||||||
|
score=self._match
|
||||||
|
else:
|
||||||
|
score=self._mismatch
|
||||||
|
return score
|
@ -6,8 +6,6 @@ Created on 6 Nov. 2009
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from ._rassemble cimport *
|
|
||||||
|
|
||||||
|
|
||||||
cdef class RightDirectAssemble(NWS):
|
cdef class RightDirectAssemble(NWS):
|
||||||
|
|
||||||
|
410
python/obitools3/commands/ngsfilter.pyx
Normal file
410
python/obitools3/commands/ngsfilter.pyx
Normal file
@ -0,0 +1,410 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
from obitools3.dms.column.column cimport Column, Column_line
|
||||||
|
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
|
||||||
|
from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t, OBI_QUAL
|
||||||
|
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.align._freeendgapfm import FreeEndGapFullMatch
|
||||||
|
from obitools3.dms.obiseq cimport Nuc_Seq, Nuc_Seq_Stored
|
||||||
|
from functools import reduce, cmp_to_key
|
||||||
|
import math
|
||||||
|
from curses.ascii import SO
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addSequenceInputOption(parser)
|
||||||
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi ngsfilter specific options')
|
||||||
|
|
||||||
|
group.add_argument('-t','--info-view',
|
||||||
|
action="store", dest="ngsfilter:info_view",
|
||||||
|
metavar="<URI>",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="URI to the view containing the samples definition (with tags, primers, sample names,...)")
|
||||||
|
|
||||||
|
group.add_argument('-u','--unidentified',
|
||||||
|
action="store", dest="ngsfilter:unidentified",
|
||||||
|
metavar="<URI>",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="URI to the view used to store the sequences unassigned to any sample")
|
||||||
|
|
||||||
|
group.add_argument('-e','--error',
|
||||||
|
action="store", dest="ngsfilter:error",
|
||||||
|
metavar="###",
|
||||||
|
type=int,
|
||||||
|
default=2,
|
||||||
|
help="Number of errors allowed for matching primers [default = 2]")
|
||||||
|
|
||||||
|
|
||||||
|
class Primer:
|
||||||
|
|
||||||
|
collection={}
|
||||||
|
|
||||||
|
def __init__(self, sequence, taglength, direct=True, error=2, verbose=False):
|
||||||
|
'''
|
||||||
|
|
||||||
|
@param sequence:
|
||||||
|
@type sequence:
|
||||||
|
@param direct:
|
||||||
|
@type direct:
|
||||||
|
'''
|
||||||
|
|
||||||
|
assert sequence not in Primer.collection \
|
||||||
|
or Primer.collection[sequence]==taglength, \
|
||||||
|
"Primer %s must always be used with tags of the same length" % sequence
|
||||||
|
|
||||||
|
Primer.collection[sequence]=taglength
|
||||||
|
|
||||||
|
self.raw=sequence
|
||||||
|
self.sequence = Nuc_Seq(b"primer", sequence)
|
||||||
|
self.lseq = len(self.sequence)
|
||||||
|
self.align=FreeEndGapFullMatch()
|
||||||
|
self.align.match=4
|
||||||
|
self.align.mismatch=-2
|
||||||
|
self.align.opengap=-2
|
||||||
|
self.align.extgap=-2
|
||||||
|
self.error=error
|
||||||
|
self.minscore = (self.lseq-error) * self.align.match + error * self.align.mismatch
|
||||||
|
self.taglength=taglength
|
||||||
|
|
||||||
|
self.align.seqB=self.sequence
|
||||||
|
|
||||||
|
self.direct = direct
|
||||||
|
self.verbose=verbose
|
||||||
|
|
||||||
|
def reverse_complement(self):
|
||||||
|
p = Primer(self.raw,
|
||||||
|
self.taglength,
|
||||||
|
not self.direct,verbose=self.verbose,
|
||||||
|
error=self.error)
|
||||||
|
p.sequence=p.sequence.reverse_complement
|
||||||
|
p.align.seqB=p.sequence
|
||||||
|
return p
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(str(self.raw))
|
||||||
|
|
||||||
|
def __eq__(self,primer):
|
||||||
|
return self.raw==primer.raw
|
||||||
|
|
||||||
|
def __call__(self,sequence):
|
||||||
|
if len(sequence) <= self.lseq:
|
||||||
|
return None
|
||||||
|
self.align.seqA=sequence
|
||||||
|
ali=self.align()
|
||||||
|
|
||||||
|
if ali.score >= self.minscore:
|
||||||
|
score = ali.score
|
||||||
|
start = ali[1].gaps[0][1]
|
||||||
|
end = len(ali[1])-ali[1].gaps[-1][1]
|
||||||
|
if self.taglength is not None:
|
||||||
|
if self.sequence.revcomp:
|
||||||
|
if (len(sequence)-end) >= self.taglength:
|
||||||
|
tag = sequence.clone()
|
||||||
|
tag = tag[end:end+self.taglength]
|
||||||
|
tag = tag.reverse_complement.seq
|
||||||
|
else:
|
||||||
|
tag=None
|
||||||
|
else:
|
||||||
|
if start >= self.taglength:
|
||||||
|
tag = sequence.clone()
|
||||||
|
tag = tag[start - self.taglength:start].seq
|
||||||
|
else:
|
||||||
|
tag=None
|
||||||
|
else:
|
||||||
|
tag=None
|
||||||
|
|
||||||
|
return score,start,end,tag
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "%s: %s" % ({True:'D',False:'R'}[self.direct],self.raw)
|
||||||
|
|
||||||
|
__repr__=__str__
|
||||||
|
|
||||||
|
|
||||||
|
cdef dict read_info_view(info_view, error=2, verbose=False):
|
||||||
|
infos = {}
|
||||||
|
for p in info_view:
|
||||||
|
forward=Primer(p[b'forward_primer'],
|
||||||
|
len(p[b'forward_tag']) if p[b'forward_tag']!=b'-' else None,
|
||||||
|
True,
|
||||||
|
error=error,verbose=verbose)
|
||||||
|
|
||||||
|
fp = infos.get(forward,{})
|
||||||
|
infos[forward]=fp
|
||||||
|
|
||||||
|
reverse=Primer(p[b'reverse_primer'],
|
||||||
|
len(p[b'reverse_tag']) if p[b'reverse_tag']!=b'-' else None,
|
||||||
|
False,
|
||||||
|
error=error,verbose=verbose)
|
||||||
|
|
||||||
|
rp = infos.get(reverse,{})
|
||||||
|
infos[reverse]=rp
|
||||||
|
|
||||||
|
cf=forward.reverse_complement()
|
||||||
|
cr=reverse.reverse_complement()
|
||||||
|
|
||||||
|
dpp=fp.get(cr,{})
|
||||||
|
fp[cr]=dpp
|
||||||
|
|
||||||
|
rpp=rp.get(cf,{})
|
||||||
|
rp[cf]=rpp
|
||||||
|
|
||||||
|
tags = (p[b'forward_tag'] if p[b'forward_tag']!=b'-' else None,
|
||||||
|
p[b'reverse_tag'] if p[b'reverse_tag']!=b'-' else None)
|
||||||
|
|
||||||
|
assert tags not in dpp, \
|
||||||
|
"Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse)
|
||||||
|
|
||||||
|
# Save additional data
|
||||||
|
special_keys = [b'forward_primer', b'reverse_primer', b'forward_tag', b'reverse_tag']
|
||||||
|
data={}
|
||||||
|
for key in p:
|
||||||
|
if key not in special_keys:
|
||||||
|
data[key] = p[key]
|
||||||
|
|
||||||
|
dpp[tags] = data
|
||||||
|
rpp[tags] = data
|
||||||
|
|
||||||
|
return infos
|
||||||
|
|
||||||
|
|
||||||
|
cdef tuple annotate(sequence, infos, verbose=False):
|
||||||
|
|
||||||
|
def sortMatch(m1, m2):
|
||||||
|
if m1[1] is None and m2[1] is None:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if m1[1] is None:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if m2[1] is None:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
return (m1[1][1] > m2[1][2]) - (m1[1][1] < m2[1][2])
|
||||||
|
|
||||||
|
if hasattr(sequence, "quality_array"):
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality_array),0)/len(sequence.quality_array)*10
|
||||||
|
sequence[b'avg_quality']=q
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality_array[0:10]),0)
|
||||||
|
sequence[b'head_quality']=q
|
||||||
|
if len(sequence.quality_array[10:-10]) :
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality_array[10:-10]),0)/len(sequence.quality_array[10:-10])*10
|
||||||
|
sequence[b'mid_quality']=q
|
||||||
|
q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality_array[-10:]),0)
|
||||||
|
sequence[b'tail_quality']=q
|
||||||
|
|
||||||
|
directmatch = [(p,p(sequence)) for p in infos]
|
||||||
|
directmatch.sort(key=cmp_to_key(sortMatch))
|
||||||
|
directmatch=directmatch[0] if directmatch[0][1] is not None else None
|
||||||
|
|
||||||
|
if directmatch is None:
|
||||||
|
sequence[b'error']=b'No primer match'
|
||||||
|
return False,sequence
|
||||||
|
|
||||||
|
match = sequence[directmatch[1][1]:directmatch[1][2]]
|
||||||
|
|
||||||
|
sequence[b'seq_length_ori']=len(sequence)
|
||||||
|
|
||||||
|
sequence = sequence[directmatch[1][2]:]
|
||||||
|
|
||||||
|
if directmatch[0].direct:
|
||||||
|
sequence[b'direction']=b'forward'
|
||||||
|
sequence[b'forward_score']=directmatch[1][0]
|
||||||
|
sequence[b'forward_primer']=directmatch[0].raw
|
||||||
|
sequence[b'forward_match']=match.seq
|
||||||
|
|
||||||
|
else:
|
||||||
|
sequence[b'direction']=b'reverse'
|
||||||
|
sequence[b'reverse_score']=directmatch[1][0]
|
||||||
|
sequence[b'reverse_primer']=directmatch[0].raw
|
||||||
|
sequence[b'reverse_match']=match.seq
|
||||||
|
|
||||||
|
infos = infos[directmatch[0]]
|
||||||
|
reversematch = [(p,p(sequence)) for p in infos if p is not None]
|
||||||
|
reversematch.sort(key=cmp_to_key(sortMatch))
|
||||||
|
reversematch = reversematch[0] if reversematch[0][1] is not None else None
|
||||||
|
|
||||||
|
if reversematch is None and None not in infos:
|
||||||
|
if directmatch[0].direct:
|
||||||
|
message = b'No reverse primer match'
|
||||||
|
else:
|
||||||
|
message = b'No direct primer match'
|
||||||
|
|
||||||
|
sequence[b'error']=message
|
||||||
|
return False,sequence
|
||||||
|
|
||||||
|
if reversematch is None:
|
||||||
|
sequence[b'status']=b'partial'
|
||||||
|
|
||||||
|
if directmatch[0].direct:
|
||||||
|
tags=(directmatch[1][3],None)
|
||||||
|
else:
|
||||||
|
tags=(None,directmatch[1][3])
|
||||||
|
|
||||||
|
samples = infos[None]
|
||||||
|
|
||||||
|
else:
|
||||||
|
sequence[b'status']=b'full'
|
||||||
|
|
||||||
|
match = sequence[reversematch[1][1]:reversematch[1][2]]
|
||||||
|
match = match.reverse_complement
|
||||||
|
sequence = sequence[0:reversematch[1][1]]
|
||||||
|
|
||||||
|
if directmatch[0].direct:
|
||||||
|
tags=(directmatch[1][3],reversematch[1][3])
|
||||||
|
sequence[b'reverse_score']=reversematch[1][0]
|
||||||
|
sequence[b'reverse_primer']=reversematch[0].raw
|
||||||
|
sequence[b'reverse_match']=match.seq
|
||||||
|
|
||||||
|
else:
|
||||||
|
tags=(reversematch[1][3],directmatch[1][3])
|
||||||
|
sequence[b'forward_score']=reversematch[1][0]
|
||||||
|
sequence[b'forward_primer']=reversematch[0].raw
|
||||||
|
sequence[b'forward_match']=match.seq
|
||||||
|
|
||||||
|
if tags[0] is not None:
|
||||||
|
sequence[b'forward_tag'] = tags[0]
|
||||||
|
if tags[1] is not None:
|
||||||
|
sequence[b'reverse_tag'] = tags[1]
|
||||||
|
|
||||||
|
samples = infos[reversematch[0]]
|
||||||
|
|
||||||
|
if not directmatch[0].direct:
|
||||||
|
sequence=sequence.reverse_complement
|
||||||
|
|
||||||
|
sample=None
|
||||||
|
|
||||||
|
if tags[0] is not None: # Direct tag known
|
||||||
|
if tags[1] is not None: # Reverse tag known
|
||||||
|
sample = samples.get(tags, None)
|
||||||
|
else: # Reverse tag known
|
||||||
|
s=[samples[x] for x in samples if x[0]==tags[0]]
|
||||||
|
if len(s)==1:
|
||||||
|
sample=s[0]
|
||||||
|
elif len(s)>1:
|
||||||
|
sequence[b'error']=b'multiple samples match tags'
|
||||||
|
return False,sequence
|
||||||
|
else:
|
||||||
|
sample=None
|
||||||
|
else:
|
||||||
|
if tags[1] is not None: # Reverse tag known
|
||||||
|
s=[samples[x] for x in samples if x[1]==tags[1]]
|
||||||
|
if len(s)==1:
|
||||||
|
sample=s[0]
|
||||||
|
elif len(s)>1:
|
||||||
|
sequence[b'error']=b'multiple samples match tags'
|
||||||
|
return False,sequence
|
||||||
|
else: # Reverse tag known
|
||||||
|
sample=None
|
||||||
|
|
||||||
|
if sample is None:
|
||||||
|
sequence[b'error']=b"Cannot assign sequence to a sample"
|
||||||
|
return False,sequence
|
||||||
|
|
||||||
|
sequence.update(sample)
|
||||||
|
sequence[b'seq_length']=len(sequence)
|
||||||
|
|
||||||
|
return True, sequence
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi ngsfilter")
|
||||||
|
|
||||||
|
assert config['ngsfilter']['info_view'] is not None, "Option -t must be specified"
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
if input[2] != View_NUC_SEQS:
|
||||||
|
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||||
|
|
||||||
|
# Open the output
|
||||||
|
output = open_uri(config['obi']['outputURI'],
|
||||||
|
input=False,
|
||||||
|
newviewtype=View_NUC_SEQS)
|
||||||
|
|
||||||
|
if output is None:
|
||||||
|
raise Exception("Could not create output view")
|
||||||
|
|
||||||
|
entries = input[1]
|
||||||
|
o_view = output[1]
|
||||||
|
|
||||||
|
# Open the the view containing the informations about the tags and the primers
|
||||||
|
info_input = open_uri(config['ngsfilter']['info_view'])
|
||||||
|
if info_input is None:
|
||||||
|
raise Exception("Could not read the view containing the informations about the tags and the primers")
|
||||||
|
info_view = info_input[1]
|
||||||
|
|
||||||
|
# Open the unidentified view
|
||||||
|
if config['ngsfilter']['unidentified'] is not None:
|
||||||
|
unidentified_input = open_uri(config['ngsfilter']['unidentified'],
|
||||||
|
input=False,
|
||||||
|
newviewtype=View_NUC_SEQS)
|
||||||
|
if unidentified_input is None:
|
||||||
|
raise Exception("Could not open the view containing the unidentified reads")
|
||||||
|
unidentified = unidentified_input[1]
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
pb = ProgressBar(len(entries), config, seconde=5)
|
||||||
|
|
||||||
|
# Check and store primers and tags
|
||||||
|
infos = read_info_view(info_view, error=config['ngsfilter']['error'])
|
||||||
|
|
||||||
|
g = 0
|
||||||
|
u = 0
|
||||||
|
i = 0
|
||||||
|
try:
|
||||||
|
for iseq in entries:
|
||||||
|
pb(i)
|
||||||
|
i+=1
|
||||||
|
modseq = Nuc_Seq.new_from_stored(iseq)
|
||||||
|
good, oseq = annotate(modseq, infos)
|
||||||
|
if good:
|
||||||
|
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||||
|
g+=1
|
||||||
|
elif unidentified is not None:
|
||||||
|
unidentified[u].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||||
|
u+=1
|
||||||
|
except Exception, e:
|
||||||
|
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
|
||||||
|
|
||||||
|
input[0].close()
|
||||||
|
output[0].close()
|
||||||
|
info_input[0].close()
|
||||||
|
unidentified_input[0].close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# TODO ??
|
||||||
|
# if options.taglist is not None:
|
||||||
|
#TODO: Patch when no taglists
|
||||||
|
# else:
|
||||||
|
# options.direct=options.direct.lower()
|
||||||
|
# options.reverse=options.reverse.lower()
|
||||||
|
# primers={options.direct:(options.taglength,{})}
|
||||||
|
# if options.reverse is not None:
|
||||||
|
# reverse = options.reverse
|
||||||
|
# else:
|
||||||
|
# reverse = '-'
|
||||||
|
# primers[options.direct][1][reverse]={'-':('-','-',True,None)}
|
||||||
|
|
Reference in New Issue
Block a user