Cython API: Various fixes in input handlers (parsers, openers etc).
Mostly working but not bug-free
This commit is contained in:
@ -6,7 +6,9 @@ Created on 30 mars 2016
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from obitools3.dms.obiseq import Nuc_Seq
|
import types
|
||||||
|
|
||||||
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
|
|
||||||
|
|
||||||
def fastaIterator(lineiterator,
|
def fastaIterator(lineiterator,
|
||||||
@ -48,7 +50,7 @@ def fastaIterator(lineiterator,
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
if read >= ionly:
|
if ionly >= 0 and read >= ionly:
|
||||||
break
|
break
|
||||||
|
|
||||||
while skipped < skip :
|
while skipped < skip :
|
||||||
@ -79,7 +81,7 @@ def fastaIterator(lineiterator,
|
|||||||
# definition,
|
# definition,
|
||||||
# tags=tags,
|
# tags=tags,
|
||||||
# )
|
# )
|
||||||
|
# TODO
|
||||||
yield { "id" : ident,
|
yield { "id" : ident,
|
||||||
"definition" : definition,
|
"definition" : definition,
|
||||||
"sequence" : sequence,
|
"sequence" : sequence,
|
||||||
@ -105,64 +107,64 @@ def fastaNucIterator(lineiterator,
|
|||||||
cdef list s
|
cdef list s
|
||||||
cdef bytes sequence
|
cdef bytes sequence
|
||||||
cdef int lines_to_skip, ionly, read
|
cdef int lines_to_skip, ionly, read
|
||||||
# cdef OBI_Seq seq
|
cdef Nuc_Seq seq
|
||||||
|
|
||||||
if only is None:
|
if only is None:
|
||||||
ionly=-1
|
ionly = -1
|
||||||
else:
|
else:
|
||||||
ionly=int(only)
|
ionly = int(only)
|
||||||
|
|
||||||
if isinstance(lineiterator,(str,bytes)):
|
if isinstance(lineiterator, (str, bytes)):
|
||||||
lineiterator=uopen(lineiterator)
|
lineiterator=uopen(lineiterator)
|
||||||
|
|
||||||
|
if isinstance(lineiterator, types.GeneratorType):
|
||||||
|
iterator = lineiterator
|
||||||
if isinstance(lineiterator, LineBuffer):
|
if isinstance(lineiterator, LineBuffer):
|
||||||
lb=lineiterator
|
iterator = iter(lineiterator)
|
||||||
else:
|
else:
|
||||||
lb=LineBuffer(lineiterator,buffersize)
|
iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||||
|
|
||||||
|
|
||||||
skipped = 0
|
skipped = 0
|
||||||
read = 0
|
read = 0
|
||||||
i = iter(lb)
|
|
||||||
|
|
||||||
if firstline is None:
|
if firstline is None:
|
||||||
line = next(i)
|
line = next(iterator)
|
||||||
else:
|
else:
|
||||||
line = firstline
|
line = firstline
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
if read >= ionly:
|
if ionly >= 0 and read >= ionly:
|
||||||
break
|
break
|
||||||
|
|
||||||
while skipped < skip :
|
while skipped < skip :
|
||||||
line = next(i)
|
line = next(iterator)
|
||||||
try:
|
try:
|
||||||
while line[0]!='>':
|
while line[0]!='>':
|
||||||
line = next(i)
|
line = next(iterator)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass
|
pass
|
||||||
skipped += 1
|
skipped += 1
|
||||||
|
|
||||||
ident,tags,definition = parseHeader(line)
|
ident,tags,definition = parseHeader(line)
|
||||||
s = []
|
s = []
|
||||||
line = next(i)
|
line = next(iterator)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while line[0]!='>':
|
while line[0]!='>':
|
||||||
s.append(str2bytes(line)[0:-1])
|
s.append(str2bytes(line)[0:-1])
|
||||||
line = next(i)
|
line = next(iterator)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
sequence = b"".join(s)
|
sequence = b"".join(s)
|
||||||
|
|
||||||
# seq =
|
|
||||||
seq = Nuc_Seq(ident,
|
seq = Nuc_Seq(ident,
|
||||||
sequence,
|
sequence,
|
||||||
definition,
|
definition=definition,
|
||||||
None,-1,
|
quality=None,
|
||||||
tags)
|
offset=-1,
|
||||||
|
tags=tags)
|
||||||
|
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ Created on 30 mars 2016
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from obitools3.dms.obiseq import Nuc_Seq
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
|
|
||||||
|
|
||||||
def fastqIterator(lineiterator,
|
def fastqIterator(lineiterator,
|
||||||
@ -74,10 +74,9 @@ def fastqWithQualityIterator(lineiterator,
|
|||||||
else:
|
else:
|
||||||
hline = firstline
|
hline = firstline
|
||||||
|
|
||||||
|
|
||||||
for line in i:
|
for line in i:
|
||||||
|
|
||||||
if read >= ionly:
|
if ionly >= 0 and read >= ionly:
|
||||||
break
|
break
|
||||||
|
|
||||||
ident,tags,definition = parseHeader(hline)
|
ident,tags,definition = parseHeader(hline)
|
||||||
@ -87,9 +86,10 @@ def fastqWithQualityIterator(lineiterator,
|
|||||||
|
|
||||||
seq = Nuc_Seq(ident,
|
seq = Nuc_Seq(ident,
|
||||||
sequence,
|
sequence,
|
||||||
definition,
|
definition=definition,
|
||||||
quality,qualityoffset,
|
quality=quality,
|
||||||
tags)
|
offset=qualityoffset,
|
||||||
|
tags=tags)
|
||||||
|
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
@ -152,7 +152,7 @@ def fastqWithoutQualityIterator(lineiterator,
|
|||||||
|
|
||||||
for line in i:
|
for line in i:
|
||||||
|
|
||||||
if read >= ionly:
|
if ionly >= 0 and read >= ionly:
|
||||||
break
|
break
|
||||||
|
|
||||||
ident,tags,definition = parseHeader(hline)
|
ident,tags,definition = parseHeader(hline)
|
||||||
@ -162,9 +162,10 @@ def fastqWithoutQualityIterator(lineiterator,
|
|||||||
|
|
||||||
seq = Nuc_Seq(ident,
|
seq = Nuc_Seq(ident,
|
||||||
sequence,
|
sequence,
|
||||||
definition,
|
definition=definition,
|
||||||
None,-1,
|
quality=None,
|
||||||
tags)
|
offset=-1,
|
||||||
|
tags=tags)
|
||||||
|
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ def entryIteratorFactory(lineiterator,
|
|||||||
if isinstance(lineiterator, LineBuffer):
|
if isinstance(lineiterator, LineBuffer):
|
||||||
lb=lineiterator
|
lb=lineiterator
|
||||||
else:
|
else:
|
||||||
lb=LineBuffer(lineiterator,buffersize)
|
lb=LineBuffer(lineiterator, buffersize)
|
||||||
|
|
||||||
i = iter(lb)
|
i = iter(lb)
|
||||||
|
|
||||||
@ -62,25 +62,28 @@ def entryIteratorFactory(lineiterator,
|
|||||||
elif is_ngsfilter_line(first):
|
elif is_ngsfilter_line(first):
|
||||||
format=b"ngsfilter"
|
format=b"ngsfilter"
|
||||||
|
|
||||||
|
# TODO Temporary fix
|
||||||
|
first=None
|
||||||
|
lineiterator.seek(0)
|
||||||
|
|
||||||
if format==b'fasta':
|
if format==b'fasta':
|
||||||
if seqtype == b'nuc':
|
if seqtype == b'nuc':
|
||||||
return (fastaNucIterator(lineiterator,
|
return (fastaNucIterator(lineiterator,
|
||||||
skip,only,
|
skip=skip,only=only,
|
||||||
first),
|
firstline=first,
|
||||||
|
buffersize=buffersize),
|
||||||
Nuc_Seq)
|
Nuc_Seq)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
elif format==b'fastq':
|
elif format==b'fastq':
|
||||||
return (fastqIterator(lineiterator,
|
return (fastqIterator(lineiterator,
|
||||||
skip,only,
|
skip=skip,only=only,
|
||||||
qualityoffset,
|
qualityoffset=qualityoffset,
|
||||||
first),
|
noquality=noquality,
|
||||||
|
firstline=first,
|
||||||
|
buffersize=buffersize),
|
||||||
Nuc_Seq)
|
Nuc_Seq)
|
||||||
|
|
||||||
|
|
||||||
raise NotImplementedError('File format not yet implemented')
|
raise NotImplementedError('File format not yet implemented')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -364,19 +364,22 @@ def open_uri(uri,
|
|||||||
if qualifiers[b"seqtype"]==b"nuc":
|
if qualifiers[b"seqtype"]==b"nuc":
|
||||||
objclass = Nuc_Seq
|
objclass = Nuc_Seq
|
||||||
if format==b"fasta":
|
if format==b"fasta":
|
||||||
iseq = fastaNucIterator(file,skip,only)
|
iseq = fastaNucIterator(file,
|
||||||
|
skip=skip,
|
||||||
|
only=only)
|
||||||
elif format==b"fastq":
|
elif format==b"fastq":
|
||||||
iseq = fastqIterator(file,
|
iseq = fastqIterator(file,
|
||||||
skip,only,
|
skip=skip,
|
||||||
offset,
|
only=only,
|
||||||
noquality)
|
offset=offset,
|
||||||
|
noquality=noquality)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError('Sequence file format not implemented')
|
raise NotImplementedError('Sequence file format not implemented')
|
||||||
elif qualifiers[b"seqtype"]==b"prot":
|
elif qualifiers[b"seqtype"]==b"prot":
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
else:
|
else:
|
||||||
iseq,objclass = entryIteratorFactory(file,
|
iseq,objclass = entryIteratorFactory(file,
|
||||||
skip,only,
|
skip, only,
|
||||||
seqtype,
|
seqtype,
|
||||||
offset,
|
offset,
|
||||||
noquality,
|
noquality,
|
||||||
@ -389,10 +392,9 @@ def open_uri(uri,
|
|||||||
blanklineskip,
|
blanklineskip,
|
||||||
commentchar)
|
commentchar)
|
||||||
|
|
||||||
|
#tmpdms = get_temp_dms()
|
||||||
|
|
||||||
tmpdms = get_temp_dms()
|
return (file, iseq, objclass, urib)
|
||||||
|
|
||||||
return (file,iseq,objclass,urib)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user