Add patch in to eliminate sequence without taxonomic data

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@9 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2007-03-14 16:13:23 +00:00
parent 78205cb1a3
commit 9726434b46

View File

@ -249,7 +249,10 @@ def genbankParser(entry):
Id = _gbParseID.findall(entry)[0] Id = _gbParseID.findall(entry)[0]
De = ' '.join(_gbParseDE.findall(entry)[0].split()) De = ' '.join(_gbParseDE.findall(entry)[0].split())
Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper()) Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper())
try:
Tx = int(_gbParseTX.findall(entry)[0]) Tx = int(_gbParseTX.findall(entry)[0])
except IndexError:
Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq} return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
def sequenceIterator(file,parser): def sequenceIterator(file,parser):
@ -354,6 +357,7 @@ def ecoSeqWriter(file,input,taxindex,parser=genbankParser):
progressBar(1, inputsize,reset=True) progressBar(1, inputsize,reset=True)
for entry in entries: for entry in entries:
if entry['taxid'] is not None:
entry['taxid']=taxindex[entry['taxid']] entry['taxid']=taxindex[entry['taxid']]
if entry['taxid'] is not None: if entry['taxid'] is not None:
seqcount+=1 seqcount+=1
@ -362,7 +366,9 @@ def ecoSeqWriter(file,input,taxindex,parser=genbankParser):
skipped.append[entry['id']] skipped.append[entry['id']]
where = universalTell(input) where = universalTell(input)
progressBar(where, inputsize) progressBar(where, inputsize)
print >>sys.stderr," Read sequences : %d " % seqcount, print >>sys.stderr," Readed sequences : %d " % seqcount,
else:
skipped.append[entry['id']]
print >>sys.stderr print >>sys.stderr
output.seek(0,0) output.seek(0,0)