Cython API: Various fixes in input handlers (parsers, openers etc).

Mostly working but not bug-free
This commit is contained in:
Celine Mercier
2017-08-20 17:37:51 +02:00
parent c559ddf487
commit 74f15d1a23
4 changed files with 75 additions and 67 deletions

View File

@ -6,7 +6,9 @@ Created on 30 mars 2016
@author: coissac
'''
from obitools3.dms.obiseq import Nuc_Seq
import types
from obitools3.dms.obiseq cimport Nuc_Seq
def fastaIterator(lineiterator,
@ -48,7 +50,7 @@ def fastaIterator(lineiterator,
while True:
if read >= ionly:
if ionly >= 0 and read >= ionly:
break
while skipped < skip :
@ -79,7 +81,7 @@ def fastaIterator(lineiterator,
# definition,
# tags=tags,
# )
# TODO
yield { "id" : ident,
"definition" : definition,
"sequence" : sequence,
@ -105,7 +107,7 @@ def fastaNucIterator(lineiterator,
cdef list s
cdef bytes sequence
cdef int lines_to_skip, ionly, read
# cdef OBI_Seq seq
cdef Nuc_Seq seq
if only is None:
ionly = -1
@ -115,54 +117,54 @@ def fastaNucIterator(lineiterator,
if isinstance(lineiterator, (str, bytes)):
lineiterator=uopen(lineiterator)
if isinstance(lineiterator, types.GeneratorType):
iterator = lineiterator
if isinstance(lineiterator, LineBuffer):
lb=lineiterator
iterator = iter(lineiterator)
else:
lb=LineBuffer(lineiterator,buffersize)
iterator = iter(LineBuffer(lineiterator, buffersize))
skipped = 0
read = 0
i = iter(lb)
if firstline is None:
line = next(i)
line = next(iterator)
else:
line = firstline
while True:
if read >= ionly:
if ionly >= 0 and read >= ionly:
break
while skipped < skip :
line = next(i)
line = next(iterator)
try:
while line[0]!='>':
line = next(i)
line = next(iterator)
except StopIteration:
pass
skipped += 1
ident,tags,definition = parseHeader(line)
s = []
line = next(i)
line = next(iterator)
try:
while line[0]!='>':
s.append(str2bytes(line)[0:-1])
line = next(i)
line = next(iterator)
except StopIteration:
pass
sequence = b"".join(s)
# seq =
seq = Nuc_Seq(ident,
sequence,
definition,
None,-1,
tags)
definition=definition,
quality=None,
offset=-1,
tags=tags)
yield seq

View File

@ -6,7 +6,7 @@ Created on 30 mars 2016
@author: coissac
'''
from obitools3.dms.obiseq import Nuc_Seq
from obitools3.dms.obiseq cimport Nuc_Seq
def fastqIterator(lineiterator,
@ -74,10 +74,9 @@ def fastqWithQualityIterator(lineiterator,
else:
hline = firstline
for line in i:
if read >= ionly:
if ionly >= 0 and read >= ionly:
break
ident,tags,definition = parseHeader(hline)
@ -87,9 +86,10 @@ def fastqWithQualityIterator(lineiterator,
seq = Nuc_Seq(ident,
sequence,
definition,
quality,qualityoffset,
tags)
definition=definition,
quality=quality,
offset=qualityoffset,
tags=tags)
yield seq
@ -152,7 +152,7 @@ def fastqWithoutQualityIterator(lineiterator,
for line in i:
if read >= ionly:
if ionly >= 0 and read >= ionly:
break
ident,tags,definition = parseHeader(hline)
@ -162,9 +162,10 @@ def fastqWithoutQualityIterator(lineiterator,
seq = Nuc_Seq(ident,
sequence,
definition,
None,-1,
tags)
definition=definition,
quality=None,
offset=-1,
tags=tags)
yield seq

View File

@ -62,25 +62,28 @@ def entryIteratorFactory(lineiterator,
elif is_ngsfilter_line(first):
format=b"ngsfilter"
# TODO Temporary fix
first=None
lineiterator.seek(0)
if format==b'fasta':
if seqtype == b'nuc':
return (fastaNucIterator(lineiterator,
skip,only,
first),
skip=skip,only=only,
firstline=first,
buffersize=buffersize),
Nuc_Seq)
else:
raise NotImplementedError()
elif format==b'fastq':
return (fastqIterator(lineiterator,
skip,only,
qualityoffset,
first),
skip=skip,only=only,
qualityoffset=qualityoffset,
noquality=noquality,
firstline=first,
buffersize=buffersize),
Nuc_Seq)
raise NotImplementedError('File format not yet implemented')

View File

@ -364,12 +364,15 @@ def open_uri(uri,
if qualifiers[b"seqtype"]==b"nuc":
objclass = Nuc_Seq
if format==b"fasta":
iseq = fastaNucIterator(file,skip,only)
iseq = fastaNucIterator(file,
skip=skip,
only=only)
elif format==b"fastq":
iseq = fastqIterator(file,
skip,only,
offset,
noquality)
skip=skip,
only=only,
offset=offset,
noquality=noquality)
else:
raise NotImplementedError('Sequence file format not implemented')
elif qualifiers[b"seqtype"]==b"prot":
@ -389,8 +392,7 @@ def open_uri(uri,
blanklineskip,
commentchar)
tmpdms = get_temp_dms()
#tmpdms = get_temp_dms()
return (file, iseq, objclass, urib)