This commit is contained in:
2007-06-21 13:58:19 +00:00
parent 24f958ce1f
commit 379ca7988b

View File

@ -294,25 +294,25 @@ def emblEntryParser(entry):
######################
_fastaParseID = re.compile('(?<=^>)[^ ]+')
_fastaParseDE = re.compile('(?<=^>).+',)
_fastaParseSQ = re.compile('^[^>].+',re.MULTILINE+re.DOTALL)
_fastaParseTX = re.compile('(?<=[[Tt]axon:) *[0-9]+ *(?=])')
def parseFasta(seq):
title = seq[0].strip()[1:].split(None,1)
id=title[0]
if len(title) == 2:
field = title[1].split('; ')
else:
field=[]
info = dict(x.split('=') for x in field if '=' in x)
definition = ' '.join([x for x in field if '=' not in x])
seq=(''.join([x.strip() for x in seq[1:]])).upper()
return id,seq,definition,info
def fastaEntryParser(entry):
Id = _fastaParseID.findall(entry)[0]
De = _fastaParseDE.findall(entry)[0].split(None,1)[1:]
if not De:
De=''
else:
De=De[0]
Sq = cleanSeq(_fastaParseSQ.findall(entry)[0].upper())
try:
Tx = int(_fastaParseTX.findall(entry)[0])
except IndexError:
Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
id,seq,definition,info = parseFasta(entry)
Tx = info.get('taxid',None)
if Tx is not None:
Tx=int(Tx)
return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq}