Patch a bug for primers without tags

This commit is contained in:
2012-06-27 21:20:29 +00:00
parent 617638a4c1
commit 4892f93e96
2 changed files with 41 additions and 3 deletions

View File

@ -11,6 +11,7 @@ from obitools.options._options import allEntryIterator
from obitools.word._readindex import ReadIndex,minword
import sys
import math
def addWindowsOptions(optionManager):
@ -72,6 +73,40 @@ def addWindowsOptions(optionManager):
help="Write singleton sequence in this file"
)
def cutQuality(s):
def mean(x):
return float(reduce(lambda a,b: a+b,x,0))/len(x)
def cumsum0(x):
if x[0] < 0: x[0]=0
for i in xrange(1,len(x)):
x[i]+=x[i-1]
if x[i]<0: x[i]=0
q = [- math.log10(a) * 10 for a in s.quality]
mq=mean(q)
cumsum0([a - mq for a in q])
mx = max(q)
xmax = len(q)-1
while(q[xmax] < mx):
xmax-=1
xmin=xmax
xmax+=1
while(xmin>0 and q[xmin]>0):
xmin-=1
if q[xmin]==0:
xmin+=1
return s[xmin:xmax]
def cutDirectReverse(entries):
first = []
@ -99,7 +134,7 @@ def cutDirectReverse(entries):
def seqPairs(direct,reverse):
for d in direct:
r = reverse.next()
yield(d,r)
yield(cutQuality(d),cutQuality(r))
def seq2words(seqs,options):
@ -139,7 +174,7 @@ if __name__ == '__main__':
worddone=set()
wordlist = seq2words(reference,options)
indexer = ReadIndex()
indexer = ReadIndex(readsize=105)
seqpair=0
nbseq=0
@ -176,6 +211,9 @@ if __name__ == '__main__':
for seq in indexer.iterreads(w):
i+=1
#print formatFasta(seq)
s = str(seq)
sc = str(seq.complement())
assert w in s or w in sc,'Bug !!!! sequence %s (%d) %s sans %s' % (seq.id,i,s,w)
words = seq2words((seq,),options) - worddone
wordlist|=words

View File

@ -654,7 +654,7 @@ cdef class ReadIndex:
assert self._readsize < 1024,"You cannot use reads longer than 1023 base pair"
else:
assert len(sequence[0])==len(sequence[1]) and len(sequence[0]) <= self._readsize
assert len(sequence[0]) <= self._readsize and len(sequence[1]) <= self._readsize
if self._buffer==NULL:
self._buffer = <pobinuc>malloc(self._seqsize*self._chuncksize)