Complete the fasta iterator to manage new input options

This commit is contained in:
2017-07-27 16:05:30 +02:00
parent 8d9cdb4d03
commit ec65f00cf2

View File

@ -6,12 +6,14 @@ Created on 30 mars 2016
@author: coissac
'''
#from obitools3.dms._obiseq cimport OBI_Seq
from obitools3.dms.obiseq import Nuc_Seq
def fastaIterator(lineiterator,
int buffersize=100000000,
int skip=0
int skip=0,
only=None,
firstline=None,
int buffersize=100000000
):
cdef LineBuffer lb
cdef str ident
@ -19,10 +21,14 @@ def fastaIterator(lineiterator,
cdef dict tags
cdef list s
cdef bytes sequence
cdef bytes quality
cdef int skipped
cdef int skipped, ionly, read
# cdef OBI_Seq seq
if only is None:
ionly=-1
else:
ionly=int(only)
if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator)
@ -33,10 +39,18 @@ def fastaIterator(lineiterator,
skipped = 0
i = iter(lb)
if firstline is None:
line = next(i)
else:
line = firstline
while True:
if read >= ionly:
break
while skipped < skip :
line = next(i)
try:
@ -59,7 +73,6 @@ def fastaIterator(lineiterator,
pass
sequence = b"".join(s)
quality = None
# seq = OBI_Seq(id,
# sequence,
@ -70,15 +83,20 @@ def fastaIterator(lineiterator,
yield { "id" : ident,
"definition" : definition,
"sequence" : sequence,
"quality" : quality,
"quality" : None,
"offset" : None,
"tags" : tags,
"annotation" : {}
}
read+=1
def fastaNucIterator(lineiterator,
int buffersize=100000000,
int skip=0
int skip=0,
only=None,
firstline=None,
int buffersize=100000000
):
cdef LineBuffer lb
cdef str ident
@ -86,10 +104,14 @@ def fastaNucIterator(lineiterator,
cdef dict tags
cdef list s
cdef bytes sequence
cdef bytes quality
cdef int skipped
cdef int lines_to_skip, ionly, read
# cdef OBI_Seq seq
if only is None:
ionly=-1
else:
ionly=int(only)
if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator)
@ -100,11 +122,19 @@ def fastaNucIterator(lineiterator,
skipped = 0
read = 0
i = iter(lb)
if firstline is None:
line = next(i)
else:
line = firstline
while True:
if read >= ionly:
break
while skipped < skip :
line = next(i)
try:
@ -126,17 +156,26 @@ def fastaNucIterator(lineiterator,
pass
sequence = b"".join(s)
quality = None
# seq =
yield { "id" : ident,
"definition" : definition,
"sequence" : sequence,
"quality" : quality,
"tags" : tags,
"annotation" : {}
}
seq = Nuc_Seq(ident,
sequence,
definition,
None,-1,
tags)
yield seq
# yield { "id" : ident,
# "definition" : definition,
# "sequence" : sequence,
# "quality" : None,
# "offset" : None,
# "tags" : tags,
# "annotation" : {}
# }
read+=1