Cython API: Various fixes in input handlers (parsers, openers etc).
Mostly working but not bug-free
This commit is contained in:
@ -6,7 +6,9 @@ Created on 30 mars 2016
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from obitools3.dms.obiseq import Nuc_Seq
|
||||
import types
|
||||
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
|
||||
|
||||
def fastaIterator(lineiterator,
|
||||
@ -48,7 +50,7 @@ def fastaIterator(lineiterator,
|
||||
|
||||
while True:
|
||||
|
||||
if read >= ionly:
|
||||
if ionly >= 0 and read >= ionly:
|
||||
break
|
||||
|
||||
while skipped < skip :
|
||||
@ -79,7 +81,7 @@ def fastaIterator(lineiterator,
|
||||
# definition,
|
||||
# tags=tags,
|
||||
# )
|
||||
|
||||
# TODO
|
||||
yield { "id" : ident,
|
||||
"definition" : definition,
|
||||
"sequence" : sequence,
|
||||
@ -105,65 +107,65 @@ def fastaNucIterator(lineiterator,
|
||||
cdef list s
|
||||
cdef bytes sequence
|
||||
cdef int lines_to_skip, ionly, read
|
||||
# cdef OBI_Seq seq
|
||||
cdef Nuc_Seq seq
|
||||
|
||||
if only is None:
|
||||
ionly=-1
|
||||
ionly = -1
|
||||
else:
|
||||
ionly=int(only)
|
||||
ionly = int(only)
|
||||
|
||||
if isinstance(lineiterator,(str,bytes)):
|
||||
if isinstance(lineiterator, (str, bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, types.GeneratorType):
|
||||
iterator = lineiterator
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
iterator = iter(lineiterator)
|
||||
else:
|
||||
lb=LineBuffer(lineiterator,buffersize)
|
||||
|
||||
iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||
|
||||
skipped = 0
|
||||
read = 0
|
||||
i = iter(lb)
|
||||
|
||||
if firstline is None:
|
||||
line = next(i)
|
||||
line = next(iterator)
|
||||
else:
|
||||
line = firstline
|
||||
|
||||
line = firstline
|
||||
|
||||
while True:
|
||||
|
||||
if read >= ionly:
|
||||
|
||||
if ionly >= 0 and read >= ionly:
|
||||
break
|
||||
|
||||
|
||||
while skipped < skip :
|
||||
line = next(i)
|
||||
line = next(iterator)
|
||||
try:
|
||||
while line[0]!='>':
|
||||
line = next(i)
|
||||
line = next(iterator)
|
||||
except StopIteration:
|
||||
pass
|
||||
skipped += 1
|
||||
|
||||
ident,tags,definition = parseHeader(line)
|
||||
s = []
|
||||
line = next(i)
|
||||
|
||||
line = next(iterator)
|
||||
|
||||
try:
|
||||
while line[0]!='>':
|
||||
s.append(str2bytes(line)[0:-1])
|
||||
line = next(i)
|
||||
line = next(iterator)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
sequence = b"".join(s)
|
||||
|
||||
# seq =
|
||||
seq = Nuc_Seq(ident,
|
||||
sequence,
|
||||
definition,
|
||||
None,-1,
|
||||
tags)
|
||||
|
||||
definition=definition,
|
||||
quality=None,
|
||||
offset=-1,
|
||||
tags=tags)
|
||||
|
||||
yield seq
|
||||
|
||||
# yield { "id" : ident,
|
||||
|
@ -6,7 +6,7 @@ Created on 30 mars 2016
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from obitools3.dms.obiseq import Nuc_Seq
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
|
||||
|
||||
def fastqIterator(lineiterator,
|
||||
@ -74,12 +74,11 @@ def fastqWithQualityIterator(lineiterator,
|
||||
else:
|
||||
hline = firstline
|
||||
|
||||
|
||||
for line in i:
|
||||
|
||||
if read >= ionly:
|
||||
if ionly >= 0 and read >= ionly:
|
||||
break
|
||||
|
||||
|
||||
ident,tags,definition = parseHeader(hline)
|
||||
sequence = str2bytes(line[0:-1])
|
||||
next(i)
|
||||
@ -87,9 +86,10 @@ def fastqWithQualityIterator(lineiterator,
|
||||
|
||||
seq = Nuc_Seq(ident,
|
||||
sequence,
|
||||
definition,
|
||||
quality,qualityoffset,
|
||||
tags)
|
||||
definition=definition,
|
||||
quality=quality,
|
||||
offset=qualityoffset,
|
||||
tags=tags)
|
||||
|
||||
yield seq
|
||||
|
||||
@ -149,22 +149,23 @@ def fastqWithoutQualityIterator(lineiterator,
|
||||
hline = next(i)
|
||||
else:
|
||||
hline = firstline
|
||||
|
||||
|
||||
for line in i:
|
||||
|
||||
if read >= ionly:
|
||||
|
||||
if ionly >= 0 and read >= ionly:
|
||||
break
|
||||
|
||||
ident,tags,definition = parseHeader(hline)
|
||||
sequence = str2bytes(line[0:-1])
|
||||
next(i)
|
||||
next(i)
|
||||
|
||||
|
||||
seq = Nuc_Seq(ident,
|
||||
sequence,
|
||||
definition,
|
||||
None,-1,
|
||||
tags)
|
||||
definition=definition,
|
||||
quality=None,
|
||||
offset=-1,
|
||||
tags=tags)
|
||||
|
||||
yield seq
|
||||
|
||||
|
@ -41,11 +41,11 @@ def entryIteratorFactory(lineiterator,
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
else:
|
||||
lb=LineBuffer(lineiterator,buffersize)
|
||||
|
||||
lb=LineBuffer(lineiterator, buffersize)
|
||||
|
||||
i = iter(lb)
|
||||
|
||||
first=next(i)
|
||||
first=next(i)
|
||||
|
||||
format=b"tabular"
|
||||
|
||||
@ -61,26 +61,29 @@ def entryIteratorFactory(lineiterator,
|
||||
format=b"ecopcrfile"
|
||||
elif is_ngsfilter_line(first):
|
||||
format=b"ngsfilter"
|
||||
|
||||
|
||||
# TODO Temporary fix
|
||||
first=None
|
||||
lineiterator.seek(0)
|
||||
|
||||
if format==b'fasta':
|
||||
if seqtype == b'nuc':
|
||||
return (fastaNucIterator(lineiterator,
|
||||
skip,only,
|
||||
first),
|
||||
skip=skip,only=only,
|
||||
firstline=first,
|
||||
buffersize=buffersize),
|
||||
Nuc_Seq)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
elif format==b'fastq':
|
||||
return (fastqIterator(lineiterator,
|
||||
skip,only,
|
||||
qualityoffset,
|
||||
first),
|
||||
skip=skip,only=only,
|
||||
qualityoffset=qualityoffset,
|
||||
noquality=noquality,
|
||||
firstline=first,
|
||||
buffersize=buffersize),
|
||||
Nuc_Seq)
|
||||
|
||||
|
||||
raise NotImplementedError('File format not yet implemented')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -364,19 +364,22 @@ def open_uri(uri,
|
||||
if qualifiers[b"seqtype"]==b"nuc":
|
||||
objclass = Nuc_Seq
|
||||
if format==b"fasta":
|
||||
iseq = fastaNucIterator(file,skip,only)
|
||||
iseq = fastaNucIterator(file,
|
||||
skip=skip,
|
||||
only=only)
|
||||
elif format==b"fastq":
|
||||
iseq = fastqIterator(file,
|
||||
skip,only,
|
||||
offset,
|
||||
noquality)
|
||||
skip=skip,
|
||||
only=only,
|
||||
offset=offset,
|
||||
noquality=noquality)
|
||||
else:
|
||||
raise NotImplementedError('Sequence file format not implemented')
|
||||
elif qualifiers[b"seqtype"]==b"prot":
|
||||
raise NotImplementedError()
|
||||
else:
|
||||
iseq,objclass = entryIteratorFactory(file,
|
||||
skip,only,
|
||||
skip, only,
|
||||
seqtype,
|
||||
offset,
|
||||
noquality,
|
||||
@ -388,13 +391,12 @@ def open_uri(uri,
|
||||
stripwhite,
|
||||
blanklineskip,
|
||||
commentchar)
|
||||
|
||||
|
||||
tmpdms = get_temp_dms()
|
||||
|
||||
return (file,iseq,objclass,urib)
|
||||
|
||||
|
||||
#tmpdms = get_temp_dms()
|
||||
|
||||
return (file, iseq, objclass, urib)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user