Add patch in to eliminate sequence without taxonomic data
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@9 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -249,7 +249,10 @@ def genbankParser(entry):
|
|||||||
Id = _gbParseID.findall(entry)[0]
|
Id = _gbParseID.findall(entry)[0]
|
||||||
De = ' '.join(_gbParseDE.findall(entry)[0].split())
|
De = ' '.join(_gbParseDE.findall(entry)[0].split())
|
||||||
Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper())
|
Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper())
|
||||||
|
try:
|
||||||
Tx = int(_gbParseTX.findall(entry)[0])
|
Tx = int(_gbParseTX.findall(entry)[0])
|
||||||
|
except IndexError:
|
||||||
|
Tx = None
|
||||||
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
||||||
|
|
||||||
def sequenceIterator(file,parser):
|
def sequenceIterator(file,parser):
|
||||||
@ -354,6 +357,7 @@ def ecoSeqWriter(file,input,taxindex,parser=genbankParser):
|
|||||||
|
|
||||||
progressBar(1, inputsize,reset=True)
|
progressBar(1, inputsize,reset=True)
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
if entry['taxid'] is not None:
|
||||||
entry['taxid']=taxindex[entry['taxid']]
|
entry['taxid']=taxindex[entry['taxid']]
|
||||||
if entry['taxid'] is not None:
|
if entry['taxid'] is not None:
|
||||||
seqcount+=1
|
seqcount+=1
|
||||||
@ -362,7 +366,9 @@ def ecoSeqWriter(file,input,taxindex,parser=genbankParser):
|
|||||||
skipped.append[entry['id']]
|
skipped.append[entry['id']]
|
||||||
where = universalTell(input)
|
where = universalTell(input)
|
||||||
progressBar(where, inputsize)
|
progressBar(where, inputsize)
|
||||||
print >>sys.stderr," Read sequences : %d " % seqcount,
|
print >>sys.stderr," Readed sequences : %d " % seqcount,
|
||||||
|
else:
|
||||||
|
skipped.append[entry['id']]
|
||||||
|
|
||||||
print >>sys.stderr
|
print >>sys.stderr
|
||||||
output.seek(0,0)
|
output.seek(0,0)
|
||||||
|
Reference in New Issue
Block a user