Complete the fasta iterator to manage new input options

This commit is contained in:
2017-07-27 16:05:30 +02:00
parent 8d9cdb4d03
commit ec65f00cf2

View File

@ -6,12 +6,14 @@ Created on 30 mars 2016
@author: coissac @author: coissac
''' '''
#from obitools3.dms._obiseq cimport OBI_Seq from obitools3.dms.obiseq import Nuc_Seq
def fastaIterator(lineiterator, def fastaIterator(lineiterator,
int buffersize=100000000, int skip=0,
int skip=0 only=None,
firstline=None,
int buffersize=100000000
): ):
cdef LineBuffer lb cdef LineBuffer lb
cdef str ident cdef str ident
@ -19,10 +21,14 @@ def fastaIterator(lineiterator,
cdef dict tags cdef dict tags
cdef list s cdef list s
cdef bytes sequence cdef bytes sequence
cdef bytes quality cdef int skipped, ionly, read
cdef int skipped
# cdef OBI_Seq seq # cdef OBI_Seq seq
if only is None:
ionly=-1
else:
ionly=int(only)
if isinstance(lineiterator,(str,bytes)): if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator) lineiterator=uopen(lineiterator)
@ -33,9 +39,17 @@ def fastaIterator(lineiterator,
skipped = 0 skipped = 0
i = iter(lb) i = iter(lb)
line = next(i)
if firstline is None:
line = next(i)
else:
line = firstline
while True: while True:
if read >= ionly:
break
while skipped < skip : while skipped < skip :
line = next(i) line = next(i)
@ -59,7 +73,6 @@ def fastaIterator(lineiterator,
pass pass
sequence = b"".join(s) sequence = b"".join(s)
quality = None
# seq = OBI_Seq(id, # seq = OBI_Seq(id,
# sequence, # sequence,
@ -70,26 +83,35 @@ def fastaIterator(lineiterator,
yield { "id" : ident, yield { "id" : ident,
"definition" : definition, "definition" : definition,
"sequence" : sequence, "sequence" : sequence,
"quality" : quality, "quality" : None,
"offset" : None,
"tags" : tags, "tags" : tags,
"annotation" : {} "annotation" : {}
} }
read+=1
def fastaNucIterator(lineiterator, def fastaNucIterator(lineiterator,
int buffersize=100000000, int skip=0,
int skip=0 only=None,
): firstline=None,
int buffersize=100000000
):
cdef LineBuffer lb cdef LineBuffer lb
cdef str ident cdef str ident
cdef str definition cdef str definition
cdef dict tags cdef dict tags
cdef list s cdef list s
cdef bytes sequence cdef bytes sequence
cdef bytes quality cdef int lines_to_skip, ionly, read
cdef int skipped
# cdef OBI_Seq seq # cdef OBI_Seq seq
if only is None:
ionly=-1
else:
ionly=int(only)
if isinstance(lineiterator,(str,bytes)): if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator) lineiterator=uopen(lineiterator)
@ -100,11 +122,19 @@ def fastaNucIterator(lineiterator,
skipped = 0 skipped = 0
read = 0
i = iter(lb) i = iter(lb)
line = next(i)
if firstline is None:
line = next(i)
else:
line = firstline
while True: while True:
if read >= ionly:
break
while skipped < skip : while skipped < skip :
line = next(i) line = next(i)
try: try:
@ -125,18 +155,27 @@ def fastaNucIterator(lineiterator,
except StopIteration: except StopIteration:
pass pass
sequence = b"".join(s) sequence = b"".join(s)
quality = None
# seq = # seq =
yield { "id" : ident, seq = Nuc_Seq(ident,
"definition" : definition, sequence,
"sequence" : sequence, definition,
"quality" : quality, None,-1,
"tags" : tags, tags)
"annotation" : {}
} yield seq
# yield { "id" : ident,
# "definition" : definition,
# "sequence" : sequence,
# "quality" : None,
# "offset" : None,
# "tags" : tags,
# "annotation" : {}
# }
read+=1