Complete the fastq iterator to manage new input options
This commit is contained in:
@ -7,17 +7,46 @@ Created on 30 mars 2016
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def fastqIterator(lineiterator,
|
def fastqIterator(lineiterator,
|
||||||
int buffersize=100000000,
|
int skip=0,
|
||||||
int skip=0):
|
only=None,
|
||||||
|
int qualityoffset=-1,
|
||||||
|
bool noquality=False,
|
||||||
|
firstline=None,
|
||||||
|
int buffersize=100000000
|
||||||
|
):
|
||||||
|
if noquality:
|
||||||
|
return fastqWithoutQualityIterator(lineiterator,
|
||||||
|
skip,only,
|
||||||
|
firstline,
|
||||||
|
buffersize)
|
||||||
|
else:
|
||||||
|
return fastqWithQualityIterator(lineiterator,
|
||||||
|
skip,only,
|
||||||
|
qualityoffset,
|
||||||
|
firstline,
|
||||||
|
buffersize)
|
||||||
|
|
||||||
|
def fastqWithQualityIterator(lineiterator,
|
||||||
|
int skip=0,
|
||||||
|
only=None,
|
||||||
|
int qualityoffset=-1,
|
||||||
|
firstline=None,
|
||||||
|
int buffersize=100000000
|
||||||
|
):
|
||||||
cdef LineBuffer lb
|
cdef LineBuffer lb
|
||||||
cdef str ident
|
cdef str ident
|
||||||
cdef str definition
|
cdef str definition
|
||||||
cdef dict tags
|
cdef dict tags
|
||||||
cdef bytes sequence
|
cdef bytes sequence
|
||||||
cdef bytes quality
|
cdef bytes quality
|
||||||
cdef int skipped, lines_to_skip
|
cdef int skipped, lines_to_skip, ionly, read
|
||||||
cdef int j
|
cdef int j
|
||||||
|
|
||||||
|
if only is None:
|
||||||
|
ionly=-1
|
||||||
|
else:
|
||||||
|
ionly=int(only)
|
||||||
|
|
||||||
if isinstance(lineiterator,(str,bytes)):
|
if isinstance(lineiterator,(str,bytes)):
|
||||||
lineiterator=uopen(lineiterator)
|
lineiterator=uopen(lineiterator)
|
||||||
|
|
||||||
@ -26,28 +55,125 @@ def fastqIterator(lineiterator,
|
|||||||
else:
|
else:
|
||||||
lb=LineBuffer(lineiterator,buffersize)
|
lb=LineBuffer(lineiterator,buffersize)
|
||||||
|
|
||||||
lines_to_skip = skip*4
|
|
||||||
skipped = 0
|
|
||||||
i = iter(lb)
|
i = iter(lb)
|
||||||
|
lines_to_skip = skip*4 - (firstline is not None)
|
||||||
|
|
||||||
|
for skipped in range(lines_to_skip):
|
||||||
|
next(i)
|
||||||
|
|
||||||
|
if skip > 0:
|
||||||
|
firstline=None
|
||||||
|
|
||||||
|
read=0
|
||||||
|
|
||||||
|
if firstline is None:
|
||||||
|
hline = next(i)
|
||||||
|
else:
|
||||||
|
hline = firstline
|
||||||
|
|
||||||
|
|
||||||
for line in i:
|
for line in i:
|
||||||
|
|
||||||
if skipped < lines_to_skip :
|
if read >= ionly:
|
||||||
skipped += 1
|
break
|
||||||
pass
|
|
||||||
|
ident,tags,definition = parseHeader(hline)
|
||||||
|
sequence = str2bytes(line[0:-1])
|
||||||
|
next(i)
|
||||||
|
quality = str2bytes(next(i)[0:-1])
|
||||||
|
|
||||||
|
seq = Nuc_Seq(ident,
|
||||||
|
sequence,
|
||||||
|
definition,
|
||||||
|
quality,qualityoffset,
|
||||||
|
tags)
|
||||||
|
|
||||||
else :
|
yield seq
|
||||||
ident,tags,definition = parseHeader(line)
|
|
||||||
sequence = str2bytes(next(i)[0:-1])
|
# yield { "id" : ident,
|
||||||
next(i)
|
# "definition" : definition,
|
||||||
quality = str2bytes(next(i)[0:-1])
|
# "sequence" : sequence,
|
||||||
|
# "quality" : quality,
|
||||||
|
# "offset" : qualityoffset,
|
||||||
|
# "tags" : tags,
|
||||||
|
# "annotation" : {}
|
||||||
|
# }
|
||||||
|
|
||||||
yield { "id" : ident,
|
read+=1
|
||||||
"definition" : definition,
|
hline = next(i)
|
||||||
"sequence" : sequence,
|
|
||||||
"quality" : quality,
|
|
||||||
"tags" : tags,
|
def fastqWithoutQualityIterator(lineiterator,
|
||||||
"annotation" : {}
|
int skip=0,
|
||||||
}
|
only=None,
|
||||||
|
firstline=None,
|
||||||
|
int buffersize=100000000
|
||||||
|
):
|
||||||
|
cdef LineBuffer lb
|
||||||
|
cdef str ident
|
||||||
|
cdef str definition
|
||||||
|
cdef dict tags
|
||||||
|
cdef bytes sequence
|
||||||
|
cdef bytes quality
|
||||||
|
cdef int skipped, lines_to_skip, ionly, read
|
||||||
|
cdef int j
|
||||||
|
|
||||||
|
if only is None:
|
||||||
|
ionly=-1
|
||||||
|
else:
|
||||||
|
ionly=int(only)
|
||||||
|
|
||||||
|
if isinstance(lineiterator,(str,bytes)):
|
||||||
|
lineiterator=uopen(lineiterator)
|
||||||
|
|
||||||
|
if isinstance(lineiterator, LineBuffer):
|
||||||
|
lb=lineiterator
|
||||||
|
else:
|
||||||
|
lb=LineBuffer(lineiterator,buffersize)
|
||||||
|
|
||||||
|
i = iter(lb)
|
||||||
|
lines_to_skip = skip*4 - (firstline is not None)
|
||||||
|
|
||||||
|
for skipped in range(lines_to_skip):
|
||||||
|
next(i)
|
||||||
|
|
||||||
|
if skip > 0:
|
||||||
|
firstline=None
|
||||||
|
|
||||||
|
read=0
|
||||||
|
|
||||||
|
if firstline is None:
|
||||||
|
hline = next(i)
|
||||||
|
else:
|
||||||
|
hline = firstline
|
||||||
|
|
||||||
|
for line in i:
|
||||||
|
|
||||||
|
if read >= ionly:
|
||||||
|
break
|
||||||
|
|
||||||
|
ident,tags,definition = parseHeader(hline)
|
||||||
|
sequence = str2bytes(line[0:-1])
|
||||||
|
next(i)
|
||||||
|
next(i)
|
||||||
|
|
||||||
|
seq = Nuc_Seq(ident,
|
||||||
|
sequence,
|
||||||
|
definition,
|
||||||
|
None,-1,
|
||||||
|
tags)
|
||||||
|
|
||||||
|
yield seq
|
||||||
|
|
||||||
|
# yield { "id" : ident,
|
||||||
|
# "definition" : definition,
|
||||||
|
# "sequence" : sequence,
|
||||||
|
# "quality" : None,
|
||||||
|
# "offset" : None,
|
||||||
|
# "tags" : tags,
|
||||||
|
# "annotation" : {}
|
||||||
|
# }
|
||||||
|
|
||||||
|
read+=1
|
||||||
|
hline = next(i)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user