From ec65f00cf2caaaebe0538e6ff9064bd631b71c61 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 27 Jul 2017 16:05:30 +0200 Subject: [PATCH] Complete the fasta iterator to manage new input options --- python/obitools3/parsers/fasta.pyx | 87 +++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/python/obitools3/parsers/fasta.pyx b/python/obitools3/parsers/fasta.pyx index 38ac738..b2f0b43 100644 --- a/python/obitools3/parsers/fasta.pyx +++ b/python/obitools3/parsers/fasta.pyx @@ -6,12 +6,14 @@ Created on 30 mars 2016 @author: coissac ''' -#from obitools3.dms._obiseq cimport OBI_Seq +from obitools3.dms.obiseq import Nuc_Seq def fastaIterator(lineiterator, - int buffersize=100000000, - int skip=0 + int skip=0, + only=None, + firstline=None, + int buffersize=100000000 ): cdef LineBuffer lb cdef str ident @@ -19,10 +21,14 @@ def fastaIterator(lineiterator, cdef dict tags cdef list s cdef bytes sequence - cdef bytes quality - cdef int skipped + cdef int skipped, ionly, read # cdef OBI_Seq seq + if only is None: + ionly=-1 + else: + ionly=int(only) + if isinstance(lineiterator,(str,bytes)): lineiterator=uopen(lineiterator) @@ -33,9 +39,17 @@ def fastaIterator(lineiterator, skipped = 0 i = iter(lb) - line = next(i) + + if firstline is None: + line = next(i) + else: + line = firstline + while True: + + if read >= ionly: + break while skipped < skip : line = next(i) @@ -59,7 +73,6 @@ def fastaIterator(lineiterator, pass sequence = b"".join(s) - quality = None # seq = OBI_Seq(id, # sequence, @@ -70,26 +83,35 @@ def fastaIterator(lineiterator, yield { "id" : ident, "definition" : definition, "sequence" : sequence, - "quality" : quality, + "quality" : None, + "offset" : None, "tags" : tags, "annotation" : {} } + + read+=1 def fastaNucIterator(lineiterator, - int buffersize=100000000, - int skip=0 - ): + int skip=0, + only=None, + firstline=None, + int buffersize=100000000 + ): cdef LineBuffer lb cdef str ident cdef str definition cdef dict tags cdef list s cdef bytes sequence - cdef bytes quality - cdef int skipped + cdef int lines_to_skip, ionly, read # cdef OBI_Seq seq + if only is None: + ionly=-1 + else: + ionly=int(only) + if isinstance(lineiterator,(str,bytes)): lineiterator=uopen(lineiterator) @@ -100,11 +122,19 @@ def fastaNucIterator(lineiterator, skipped = 0 + read = 0 i = iter(lb) - line = next(i) + if firstline is None: + line = next(i) + else: + line = firstline + while True: + if read >= ionly: + break + while skipped < skip : line = next(i) try: @@ -125,18 +155,27 @@ def fastaNucIterator(lineiterator, except StopIteration: pass - sequence = b"".join(s) - quality = None - + sequence = b"".join(s) # seq = - yield { "id" : ident, - "definition" : definition, - "sequence" : sequence, - "quality" : quality, - "tags" : tags, - "annotation" : {} - } + seq = Nuc_Seq(ident, + sequence, + definition, + None,-1, + tags) + + yield seq + +# yield { "id" : ident, +# "definition" : definition, +# "sequence" : sequence, +# "quality" : None, +# "offset" : None, +# "tags" : tags, +# "annotation" : {} +# } + + read+=1