143 lines
3.3 KiB
Cython
143 lines
3.3 KiB
Cython
#cython: language_level=3
|
|
|
|
'''
|
|
Created on 30 mars 2016
|
|
|
|
@author: coissac
|
|
'''
|
|
|
|
#from obitools3.dms._obiseq cimport OBI_Seq
|
|
|
|
|
|
def fastaIterator(lineiterator,
|
|
int buffersize=100000000,
|
|
int skip=0
|
|
):
|
|
cdef LineBuffer lb
|
|
cdef str ident
|
|
cdef str definition
|
|
cdef dict tags
|
|
cdef list s
|
|
cdef bytes sequence
|
|
cdef bytes quality
|
|
cdef int skipped
|
|
# cdef OBI_Seq seq
|
|
|
|
if isinstance(lineiterator,(str,bytes)):
|
|
lineiterator=uopen(lineiterator)
|
|
|
|
if isinstance(lineiterator, LineBuffer):
|
|
lb=lineiterator
|
|
else:
|
|
lb=LineBuffer(lineiterator,buffersize)
|
|
|
|
skipped = 0
|
|
i = iter(lb)
|
|
line = next(i)
|
|
|
|
while True:
|
|
|
|
while skipped < skip :
|
|
line = next(i)
|
|
try:
|
|
while line[0]!='>':
|
|
line = next(i)
|
|
except StopIteration:
|
|
pass
|
|
skipped += 1
|
|
|
|
ident,tags,definition = parseHeader(line)
|
|
s = []
|
|
line = next(i)
|
|
|
|
try:
|
|
while line[0]!='>':
|
|
s.append(str2bytes(line)[0:-1])
|
|
line = next(i)
|
|
|
|
except StopIteration:
|
|
pass
|
|
|
|
sequence = b"".join(s)
|
|
quality = None
|
|
|
|
# seq = OBI_Seq(id,
|
|
# sequence,
|
|
# definition,
|
|
# tags=tags,
|
|
# )
|
|
|
|
yield { "id" : ident,
|
|
"definition" : definition,
|
|
"sequence" : sequence,
|
|
"quality" : quality,
|
|
"tags" : tags,
|
|
"annotation" : {}
|
|
}
|
|
|
|
|
|
def fastaNucIterator(lineiterator,
|
|
int buffersize=100000000,
|
|
int skip=0
|
|
):
|
|
cdef LineBuffer lb
|
|
cdef str ident
|
|
cdef str definition
|
|
cdef dict tags
|
|
cdef list s
|
|
cdef bytes sequence
|
|
cdef bytes quality
|
|
cdef int skipped
|
|
# cdef OBI_Seq seq
|
|
|
|
if isinstance(lineiterator,(str,bytes)):
|
|
lineiterator=uopen(lineiterator)
|
|
|
|
if isinstance(lineiterator, LineBuffer):
|
|
lb=lineiterator
|
|
else:
|
|
lb=LineBuffer(lineiterator,buffersize)
|
|
|
|
|
|
skipped = 0
|
|
i = iter(lb)
|
|
line = next(i)
|
|
|
|
while True:
|
|
|
|
while skipped < skip :
|
|
line = next(i)
|
|
try:
|
|
while line[0]!='>':
|
|
line = next(i)
|
|
except StopIteration:
|
|
pass
|
|
skipped += 1
|
|
|
|
ident,tags,definition = parseHeader(line)
|
|
s = []
|
|
line = next(i)
|
|
|
|
try:
|
|
while line[0]!='>':
|
|
s.append(str2bytes(line)[0:-1])
|
|
line = next(i)
|
|
except StopIteration:
|
|
pass
|
|
|
|
sequence = b"".join(s)
|
|
quality = None
|
|
|
|
|
|
# seq =
|
|
yield { "id" : ident,
|
|
"definition" : definition,
|
|
"sequence" : sequence,
|
|
"quality" : quality,
|
|
"tags" : tags,
|
|
"annotation" : {}
|
|
}
|
|
|
|
|
|
|
|
|