Patch a bug for primers without tags
This commit is contained in:
@ -11,6 +11,7 @@ from obitools.options._options import allEntryIterator
|
|||||||
from obitools.word._readindex import ReadIndex,minword
|
from obitools.word._readindex import ReadIndex,minword
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
import math
|
||||||
|
|
||||||
def addWindowsOptions(optionManager):
|
def addWindowsOptions(optionManager):
|
||||||
|
|
||||||
@ -72,6 +73,40 @@ def addWindowsOptions(optionManager):
|
|||||||
help="Write singleton sequence in this file"
|
help="Write singleton sequence in this file"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def cutQuality(s):
|
||||||
|
def mean(x):
|
||||||
|
return float(reduce(lambda a,b: a+b,x,0))/len(x)
|
||||||
|
|
||||||
|
def cumsum0(x):
|
||||||
|
if x[0] < 0: x[0]=0
|
||||||
|
for i in xrange(1,len(x)):
|
||||||
|
x[i]+=x[i-1]
|
||||||
|
if x[i]<0: x[i]=0
|
||||||
|
|
||||||
|
q = [- math.log10(a) * 10 for a in s.quality]
|
||||||
|
mq=mean(q)
|
||||||
|
cumsum0([a - mq for a in q])
|
||||||
|
|
||||||
|
mx = max(q)
|
||||||
|
|
||||||
|
xmax = len(q)-1
|
||||||
|
|
||||||
|
while(q[xmax] < mx):
|
||||||
|
xmax-=1
|
||||||
|
|
||||||
|
xmin=xmax
|
||||||
|
xmax+=1
|
||||||
|
|
||||||
|
while(xmin>0 and q[xmin]>0):
|
||||||
|
xmin-=1
|
||||||
|
|
||||||
|
if q[xmin]==0:
|
||||||
|
xmin+=1
|
||||||
|
|
||||||
|
return s[xmin:xmax]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def cutDirectReverse(entries):
|
def cutDirectReverse(entries):
|
||||||
first = []
|
first = []
|
||||||
|
|
||||||
@ -99,7 +134,7 @@ def cutDirectReverse(entries):
|
|||||||
def seqPairs(direct,reverse):
|
def seqPairs(direct,reverse):
|
||||||
for d in direct:
|
for d in direct:
|
||||||
r = reverse.next()
|
r = reverse.next()
|
||||||
yield(d,r)
|
yield(cutQuality(d),cutQuality(r))
|
||||||
|
|
||||||
|
|
||||||
def seq2words(seqs,options):
|
def seq2words(seqs,options):
|
||||||
@ -139,7 +174,7 @@ if __name__ == '__main__':
|
|||||||
worddone=set()
|
worddone=set()
|
||||||
wordlist = seq2words(reference,options)
|
wordlist = seq2words(reference,options)
|
||||||
|
|
||||||
indexer = ReadIndex()
|
indexer = ReadIndex(readsize=105)
|
||||||
|
|
||||||
seqpair=0
|
seqpair=0
|
||||||
nbseq=0
|
nbseq=0
|
||||||
@ -176,6 +211,9 @@ if __name__ == '__main__':
|
|||||||
for seq in indexer.iterreads(w):
|
for seq in indexer.iterreads(w):
|
||||||
i+=1
|
i+=1
|
||||||
#print formatFasta(seq)
|
#print formatFasta(seq)
|
||||||
|
s = str(seq)
|
||||||
|
sc = str(seq.complement())
|
||||||
|
assert w in s or w in sc,'Bug !!!! sequence %s (%d) %s sans %s' % (seq.id,i,s,w)
|
||||||
words = seq2words((seq,),options) - worddone
|
words = seq2words((seq,),options) - worddone
|
||||||
wordlist|=words
|
wordlist|=words
|
||||||
|
|
||||||
|
@ -654,7 +654,7 @@ cdef class ReadIndex:
|
|||||||
assert self._readsize < 1024,"You cannot use reads longer than 1023 base pair"
|
assert self._readsize < 1024,"You cannot use reads longer than 1023 base pair"
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert len(sequence[0])==len(sequence[1]) and len(sequence[0]) <= self._readsize
|
assert len(sequence[0]) <= self._readsize and len(sequence[1]) <= self._readsize
|
||||||
|
|
||||||
if self._buffer==NULL:
|
if self._buffer==NULL:
|
||||||
self._buffer = <pobinuc>malloc(self._seqsize*self._chuncksize)
|
self._buffer = <pobinuc>malloc(self._seqsize*self._chuncksize)
|
||||||
|
Reference in New Issue
Block a user