This commit is contained in:
2007-06-21 13:58:19 +00:00
parent 24f958ce1f
commit 379ca7988b

View File

@ -294,25 +294,25 @@ def emblEntryParser(entry):
###################### ######################
_fastaParseID = re.compile('(?<=^>)[^ ]+') def parseFasta(seq):
_fastaParseDE = re.compile('(?<=^>).+',) title = seq[0].strip()[1:].split(None,1)
_fastaParseSQ = re.compile('^[^>].+',re.MULTILINE+re.DOTALL) id=title[0]
_fastaParseTX = re.compile('(?<=[[Tt]axon:) *[0-9]+ *(?=])') if len(title) == 2:
field = title[1].split('; ')
else:
field=[]
info = dict(x.split('=') for x in field if '=' in x)
definition = ' '.join([x for x in field if '=' not in x])
seq=(''.join([x.strip() for x in seq[1:]])).upper()
return id,seq,definition,info
def fastaEntryParser(entry): def fastaEntryParser(entry):
Id = _fastaParseID.findall(entry)[0] id,seq,definition,info = parseFasta(entry)
De = _fastaParseDE.findall(entry)[0].split(None,1)[1:] Tx = info.get('taxid',None)
if not De: if Tx is not None:
De='' Tx=int(Tx)
else: return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq}
De=De[0]
Sq = cleanSeq(_fastaParseSQ.findall(entry)[0].upper())
try:
Tx = int(_fastaParseTX.findall(entry)[0])
except IndexError:
Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}