diff --git a/python/obitools3/parsers/fasta.pxd b/python/obitools3/parsers/fasta.pxd new file mode 100644 index 0000000..d1546d1 --- /dev/null +++ b/python/obitools3/parsers/fasta.pxd @@ -0,0 +1,8 @@ +#cython: language_level=3 + +from .header cimport parseHeader +from ..files.universalopener cimport uopen +from ..files.linebuffer cimport LineBuffer + + + \ No newline at end of file diff --git a/python/obitools3/parsers/fasta.pyx b/python/obitools3/parsers/fasta.pyx new file mode 100644 index 0000000..2a262bc --- /dev/null +++ b/python/obitools3/parsers/fasta.pyx @@ -0,0 +1,46 @@ +#cython: language_level=3 + +''' +Created on 30 mars 2016 + +@author: coissac +''' + +def fastaIterator(lineiterator, int buffersize=100000000): + cdef LineBuffer lb + cdef str ident + cdef str definition + cdef dict tags + cdef list s + + if isinstance(lineiterator,(str,bytes)): + lineiterator=uopen(lineiterator) + + if isinstance(lineiterator, LineBuffer): + lb=lineiterator + else: + lb=LineBuffer(lineiterator,buffersize) + + i = iter(lb) + line = next(i) + + while True: + ident,tags,definition = parseHeader(line) + s = [] + line = next(i) + while line[0]!='>': + s.append(line[0:-1]) + line = next(i) + sequence = "".join(s) + quality = None + + yield { "id" : ident, + "definition" : definition, + "sequence" : sequence, + "quality" : quality, + "tags" : tags, + "annotation" : {} + } + + + \ No newline at end of file