Complete the fasta iterator to manage new input options
This commit is contained in:
@ -6,12 +6,14 @@ Created on 30 mars 2016
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
|
|
||||||
#from obitools3.dms._obiseq cimport OBI_Seq
|
from obitools3.dms.obiseq import Nuc_Seq
|
||||||
|
|
||||||
|
|
||||||
def fastaIterator(lineiterator,
|
def fastaIterator(lineiterator,
|
||||||
int buffersize=100000000,
|
int skip=0,
|
||||||
int skip=0
|
only=None,
|
||||||
|
firstline=None,
|
||||||
|
int buffersize=100000000
|
||||||
):
|
):
|
||||||
cdef LineBuffer lb
|
cdef LineBuffer lb
|
||||||
cdef str ident
|
cdef str ident
|
||||||
@ -19,10 +21,14 @@ def fastaIterator(lineiterator,
|
|||||||
cdef dict tags
|
cdef dict tags
|
||||||
cdef list s
|
cdef list s
|
||||||
cdef bytes sequence
|
cdef bytes sequence
|
||||||
cdef bytes quality
|
cdef int skipped, ionly, read
|
||||||
cdef int skipped
|
|
||||||
# cdef OBI_Seq seq
|
# cdef OBI_Seq seq
|
||||||
|
|
||||||
|
if only is None:
|
||||||
|
ionly=-1
|
||||||
|
else:
|
||||||
|
ionly=int(only)
|
||||||
|
|
||||||
if isinstance(lineiterator,(str,bytes)):
|
if isinstance(lineiterator,(str,bytes)):
|
||||||
lineiterator=uopen(lineiterator)
|
lineiterator=uopen(lineiterator)
|
||||||
|
|
||||||
@ -33,9 +39,17 @@ def fastaIterator(lineiterator,
|
|||||||
|
|
||||||
skipped = 0
|
skipped = 0
|
||||||
i = iter(lb)
|
i = iter(lb)
|
||||||
line = next(i)
|
|
||||||
|
if firstline is None:
|
||||||
|
line = next(i)
|
||||||
|
else:
|
||||||
|
line = firstline
|
||||||
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
|
if read >= ionly:
|
||||||
|
break
|
||||||
|
|
||||||
while skipped < skip :
|
while skipped < skip :
|
||||||
line = next(i)
|
line = next(i)
|
||||||
@ -59,7 +73,6 @@ def fastaIterator(lineiterator,
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
sequence = b"".join(s)
|
sequence = b"".join(s)
|
||||||
quality = None
|
|
||||||
|
|
||||||
# seq = OBI_Seq(id,
|
# seq = OBI_Seq(id,
|
||||||
# sequence,
|
# sequence,
|
||||||
@ -70,26 +83,35 @@ def fastaIterator(lineiterator,
|
|||||||
yield { "id" : ident,
|
yield { "id" : ident,
|
||||||
"definition" : definition,
|
"definition" : definition,
|
||||||
"sequence" : sequence,
|
"sequence" : sequence,
|
||||||
"quality" : quality,
|
"quality" : None,
|
||||||
|
"offset" : None,
|
||||||
"tags" : tags,
|
"tags" : tags,
|
||||||
"annotation" : {}
|
"annotation" : {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
read+=1
|
||||||
|
|
||||||
|
|
||||||
def fastaNucIterator(lineiterator,
|
def fastaNucIterator(lineiterator,
|
||||||
int buffersize=100000000,
|
int skip=0,
|
||||||
int skip=0
|
only=None,
|
||||||
):
|
firstline=None,
|
||||||
|
int buffersize=100000000
|
||||||
|
):
|
||||||
cdef LineBuffer lb
|
cdef LineBuffer lb
|
||||||
cdef str ident
|
cdef str ident
|
||||||
cdef str definition
|
cdef str definition
|
||||||
cdef dict tags
|
cdef dict tags
|
||||||
cdef list s
|
cdef list s
|
||||||
cdef bytes sequence
|
cdef bytes sequence
|
||||||
cdef bytes quality
|
cdef int lines_to_skip, ionly, read
|
||||||
cdef int skipped
|
|
||||||
# cdef OBI_Seq seq
|
# cdef OBI_Seq seq
|
||||||
|
|
||||||
|
if only is None:
|
||||||
|
ionly=-1
|
||||||
|
else:
|
||||||
|
ionly=int(only)
|
||||||
|
|
||||||
if isinstance(lineiterator,(str,bytes)):
|
if isinstance(lineiterator,(str,bytes)):
|
||||||
lineiterator=uopen(lineiterator)
|
lineiterator=uopen(lineiterator)
|
||||||
|
|
||||||
@ -100,11 +122,19 @@ def fastaNucIterator(lineiterator,
|
|||||||
|
|
||||||
|
|
||||||
skipped = 0
|
skipped = 0
|
||||||
|
read = 0
|
||||||
i = iter(lb)
|
i = iter(lb)
|
||||||
line = next(i)
|
|
||||||
|
|
||||||
|
if firstline is None:
|
||||||
|
line = next(i)
|
||||||
|
else:
|
||||||
|
line = firstline
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
|
if read >= ionly:
|
||||||
|
break
|
||||||
|
|
||||||
while skipped < skip :
|
while skipped < skip :
|
||||||
line = next(i)
|
line = next(i)
|
||||||
try:
|
try:
|
||||||
@ -125,18 +155,27 @@ def fastaNucIterator(lineiterator,
|
|||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
sequence = b"".join(s)
|
sequence = b"".join(s)
|
||||||
quality = None
|
|
||||||
|
|
||||||
|
|
||||||
# seq =
|
# seq =
|
||||||
yield { "id" : ident,
|
seq = Nuc_Seq(ident,
|
||||||
"definition" : definition,
|
sequence,
|
||||||
"sequence" : sequence,
|
definition,
|
||||||
"quality" : quality,
|
None,-1,
|
||||||
"tags" : tags,
|
tags)
|
||||||
"annotation" : {}
|
|
||||||
}
|
yield seq
|
||||||
|
|
||||||
|
# yield { "id" : ident,
|
||||||
|
# "definition" : definition,
|
||||||
|
# "sequence" : sequence,
|
||||||
|
# "quality" : None,
|
||||||
|
# "offset" : None,
|
||||||
|
# "tags" : tags,
|
||||||
|
# "annotation" : {}
|
||||||
|
# }
|
||||||
|
|
||||||
|
read+=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user