diff --git a/tools/ecoPCRFormat.py b/tools/ecoPCRFormat.py index 1de51e8..c09caf2 100755 --- a/tools/ecoPCRFormat.py +++ b/tools/ecoPCRFormat.py @@ -249,7 +249,10 @@ def genbankParser(entry): Id = _gbParseID.findall(entry)[0] De = ' '.join(_gbParseDE.findall(entry)[0].split()) Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper()) - Tx = int(_gbParseTX.findall(entry)[0]) + try: + Tx = int(_gbParseTX.findall(entry)[0]) + except IndexError: + Tx = None return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq} def sequenceIterator(file,parser): @@ -354,15 +357,18 @@ def ecoSeqWriter(file,input,taxindex,parser=genbankParser): progressBar(1, inputsize,reset=True) for entry in entries: - entry['taxid']=taxindex[entry['taxid']] if entry['taxid'] is not None: - seqcount+=1 - output.write(ecoSeqPacker(entry)) + entry['taxid']=taxindex[entry['taxid']] + if entry['taxid'] is not None: + seqcount+=1 + output.write(ecoSeqPacker(entry)) + else: + skipped.append[entry['id']] + where = universalTell(input) + progressBar(where, inputsize) + print >>sys.stderr," Readed sequences : %d " % seqcount, else: skipped.append[entry['id']] - where = universalTell(input) - progressBar(where, inputsize) - print >>sys.stderr," Read sequences : %d " % seqcount, print >>sys.stderr output.seek(0,0)