A first version of the fasta parser

This commit is contained in:
2016-04-01 18:15:54 +02:00
parent 20b97c972b
commit 45c9c5075c
2 changed files with 54 additions and 0 deletions

View File

@ -0,0 +1,8 @@
#cython: language_level=3
from .header cimport parseHeader
from ..files.universalopener cimport uopen
from ..files.linebuffer cimport LineBuffer

View File

@ -0,0 +1,46 @@
#cython: language_level=3
'''
Created on 30 mars 2016
@author: coissac
'''
def fastaIterator(lineiterator, int buffersize=100000000):
cdef LineBuffer lb
cdef str ident
cdef str definition
cdef dict tags
cdef list s
if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator)
if isinstance(lineiterator, LineBuffer):
lb=lineiterator
else:
lb=LineBuffer(lineiterator,buffersize)
i = iter(lb)
line = next(i)
while True:
ident,tags,definition = parseHeader(line)
s = []
line = next(i)
while line[0]!='>':
s.append(line[0:-1])
line = next(i)
sequence = "".join(s)
quality = None
yield { "id" : ident,
"definition" : definition,
"sequence" : sequence,
"quality" : quality,
"tags" : tags,
"annotation" : {}
}