From 9726434b46d08b771655f209fec4f8dcee9882ed Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 14 Mar 2007 16:13:23 +0000 Subject: [PATCH] Add patch in to eliminate sequence without taxonomic data git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@9 60f365c0-8329-0410-b2a4-ec073aeeaa1d --- tools/ecoPCRFormat.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/ecoPCRFormat.py b/tools/ecoPCRFormat.py index 1de51e8..c09caf2 100755 --- a/tools/ecoPCRFormat.py +++ b/tools/ecoPCRFormat.py @@ -249,7 +249,10 @@ def genbankParser(entry): Id = _gbParseID.findall(entry)[0] De = ' '.join(_gbParseDE.findall(entry)[0].split()) Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper()) - Tx = int(_gbParseTX.findall(entry)[0]) + try: + Tx = int(_gbParseTX.findall(entry)[0]) + except IndexError: + Tx = None return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq} def sequenceIterator(file,parser): @@ -354,15 +357,18 @@ def ecoSeqWriter(file,input,taxindex,parser=genbankParser): progressBar(1, inputsize,reset=True) for entry in entries: - entry['taxid']=taxindex[entry['taxid']] if entry['taxid'] is not None: - seqcount+=1 - output.write(ecoSeqPacker(entry)) + entry['taxid']=taxindex[entry['taxid']] + if entry['taxid'] is not None: + seqcount+=1 + output.write(ecoSeqPacker(entry)) + else: + skipped.append[entry['id']] + where = universalTell(input) + progressBar(where, inputsize) + print >>sys.stderr," Readed sequences : %d " % seqcount, else: skipped.append[entry['id']] - where = universalTell(input) - progressBar(where, inputsize) - print >>sys.stderr," Read sequences : %d " % seqcount, print >>sys.stderr output.seek(0,0)